In [None]:
import sys
sys.path.append('..')

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from pathlib import Path
import numpy as np
import xgboost as xgb
import plotly.express as px
import folium
from global_variables import BRANDS, GEMSTONES, TAUX_CHANGE

from utils import get_sample_lot

In [None]:
DATA_PATH = Path('../data')
VERBOSE = True

In [None]:
df = pd.read_pickle(DATA_PATH / 'processed/certif_one_gem_processed_data.pkl')
if VERBOSE:
    display(df.info())

In [None]:
df = df[df.carat >= 1].copy()

In [None]:
df.location.value_counts()

# Carat analysis

In [None]:
carat_df  = df.copy().set_index('StartDate')

In [None]:
carat_df.color.value_counts()

In [None]:
carat_df['price_per_ct'] = carat_df.PriceRealised / carat_df.carat

In [None]:
grouped_time_carat_price = carat_df.groupby(['gemstone']).resample('1m').agg({"price_per_ct": 'mean', 'lot_id': 'count'})

In [None]:
grouped_time_carat_price

In [None]:
px.bar(grouped_time_carat_price.reset_index(), x='StartDate', y='price_per_ct', color='gemstone')


In [None]:
carat_df[carat_df.gemstone == 'diamond']#.loc['2020-11'].sort_values(by=['price_per_ct'],ascending=False)

In [None]:
# px.line(grouped_time_carat_price.xs('sapphire', level='main_gemstone')['price_per_ct'])
get_sample_lot(carat_df, 180268)

# Diamond analysis

In [None]:
px.bar(df.groupby('gemstone').lot_id.count())

In [None]:
diamond_df = carat_df[carat_df.gemstone == 'diamond'].copy()

In [None]:
fig = px.pie(diamond_df, names='clarity', title='Diamond Clarity Distribution')
fig.show()

In [None]:
px.bar(diamond_df.groupby('clarity').price_per_ct.mean())

In [None]:
price_moy = diamond_df.groupby('clarity').agg({'PriceRealised': 'mean'})

In [None]:
px.box(diamond_df, x='clarity', y='PriceRealised')

# biggest sales

In [None]:
biggest_sales = diamond_df[diamond_df.PriceRealised >=1000000].copy()

In [None]:
get_sample_lot(biggest_sales, 217258)

In [None]:
px.histogram(biggest_sales, x='carat')

In [None]:
fig = px.pie(biggest_sales, names='fancy', title='Diamond Clarity Distribution')
fig.show()

# Unsaled

In [None]:
unsold_df = diamond_df[diamond_df.PriceRealised.isna()].copy()

In [None]:
fig = px.pie(unsold_df, names='fancy', title='Diamond Clarity Distribution')
fig.show()

# Location analysis

In [None]:
import folium
from folium.plugins import MarkerCluster
import pandas as pd

# Clean up and process the data
df['location'] = df['location'].replace({'ceylon': 'ceylon', 'ceylan': 'ceylon', 'siam': 'thailand', 'mozambi': 'mozambique'})

name_mapping = {
    'colombia': 'Colombia',
    'burma': 'Myanmar',
    'tajikistan': 'Tajikistan',
    'ceylon': 'Sri Lanka',
    'madagascar': 'Madagascar',
    'tanzania': 'Tanzania',
    'ethiopia': 'Ethiopia',
    'zambi': 'Zambia',
    'kashmir': 'Kashmir',  # Note: Kashmir might not be a country in the GeoDataFrame
    'thailand': 'Thailand',
    'siam': 'Thailand',  # Siam is an old name for Thailand
    'mozambi': 'Mozambique',
    'malawi': 'Malawi'
}

# Count occurrences of each location
location_counts = df['location'].value_counts()

In [None]:
df.location = df.location.map(name_mapping)

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
from collections import Counter

# Count occurrences of each country
country_counts = Counter(df['location'])

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
from collections import Counter

# Count occurrences of each country
country_counts = Counter(df['location'])

# Create a GeoDataFrame with world map data
world = gpd.read_file(DATA_PATH/'ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')


In [None]:

# Update country colors based on occurrences
world['color'] = world['NAME'].apply(lambda x: country_counts.get(x, 0))




In [None]:
sub_world = world[world.color > 0][['NAME', 'color', 'geometry']].copy()

In [None]:
sub_world

In [None]:
# Create a base map
m = folium.Map(location=[0, 0], zoom_start=2)

# Create a MarkerCluster layer for better visualization of markers
marker_cluster = MarkerCluster().add_to(m)

# Add markers for each country
for country, count in country_counts.items():
    country_name = name_mapping.get(country, '')
    if country_name:
        folium.Marker(location=[0, 0], popup=f"{country_name}: {count}", tooltip=country_name).add_to(marker_cluster)


In [None]:
import matplotlib
colname = 'color'
xmin, ymin, xmax, ymax = sub_world.total_bounds

centroidx = np.mean([xmin, xmax])
centroidy = np.mean([ymin, ymax])

map1 = folium.Map(
    location=[centroidy, centroidx],
    tiles='cartodbpositron',
    zoom_start=6,
)

cmap = matplotlib.cm.get_cmap('viridis')

vmin = sub_world[colname].min()
vmax = sub_world[colname].max()


norm = matplotlib.colors.SymLogNorm(vmin=vmin, vmax=vmax, linthresh=0.1)

def fetchHexFromValue(value):
  NormedValue = norm(value)
  RGBAValue = cmap(NormedValue)
  HEXValue = matplotlib.colors.to_hex(RGBAValue)
  return HEXValue



for idx, r in sub_world.iterrows():

    lat = r["geometry"].centroid.y
    lon = r["geometry"].centroid.x
    folium.Marker(location=[lat, lon],
                  popup='idx:{0} <br> {1}: {2}'.format(idx,
                                                       colname, 
                                                       r[colname])
    ).add_to(map1)

sub_world.explore(colname, cmap="viridis", m=map1)

map1

In [None]:
def get_color(x):
    # Normalize the value within the desired range (100 to 1300)
    normalized_value = (x - 1) / (236 - 1)

    # Map the normalized value to the red color scale (0 to 255)
    red = int(255 * normalized_value)

    # Create the RGBA color string
    rgba_color = f"rgba({red}, 0, 0)"

    return rgba_color

In [None]:
m = folium.Map()
for _, r in sub_world.iterrows():
    # Without simplifying the representation of each borough,
    # the map might not be displayed
    sim_geo = gpd.GeoSeries(r["geometry"]).simplify(tolerance=0.001)
    geo_j = sim_geo.to_json()
    color = get_color(r['color'])
    print(color)
    geo_j = folium.GeoJson(data=geo_j, style_function=lambda x: {"fillColor": color})
    folium.Popup('{0}: <br> Gemstones: {1}'.format(r['NAME'], 
                                                       r[colname])).add_to(geo_j)
    geo_j.add_to(m)

In [None]:
m

In [None]:
# Create a folium map
m = folium.Map(location=[0, 0], zoom_start=2)

# Add GeoJson layer with custom styling
folium.GeoJson(
    data=sub_world,
    name='geojson',
    style_function=lambda x: {"fillColor": f"rgba({x['properties']['color']}, 0, 0, 0)"}
).add_to(m)

In [None]:
m

In [None]:
m

# Drafts

https://onlineonly.christies.com/s/jewels-online-iconic-designs/graff-diamond-eternity-band-ring-19/102322?ldp_breadcrumb=back


In [None]:
grouped_time_carat_price.xs('sapphire', level='main_gemstone')