In [14]:
import pandas as pd 
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from sklearn.preprocessing import MinMaxScaler
import numpy as np
df = pd.read_csv('../../data/pecan/sim_score_and_yield_05_23_24.csv')

# Rename columns
df = df.rename(columns={
    'city_names': 'City',
    'yield': 'Yield',
    'lat_sim': 'Latitude',
    'lon_sim': 'Longitude',
    'dew_point_sim': 'Dew Point',
    'feels_like_sim': 'Feels Like',
    'temp_min_sim': 'Min Temp',
    'temp_max_sim': 'Max Temp',
    'pressure_sim': 'Pressure',
    'humidity_sim': 'Humidity',
    'wind_speed_sim': 'Wind Speed',
    'wind_deg_sim': 'Wind Direction',
    'clouds_all_sim': 'Cloud Cover',
    'weather_id_sim': 'Weather ID',
    'Similarity Type': 'Similarity'
}) 
yield_df = df['Yield']
df = df.drop(['Unnamed: 0', 'Yield'], axis=1)

# Display the columns
print(df.columns)

# Replace inf values with NaN and drop NaNs
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

codes, uniques = pd.factorize(df['Similarity'])
df['Similarity'] = codes
codes, uniques = pd.factorize(df['City'])
df['City'] = codes
city_df  = df['City']
df = df.drop('City', axis=1)

print(df.dtypes)
# Normalize the data
scaler = MinMaxScaler()
numerical_cols = df.select_dtypes(include=['number']).columns
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])
print(df.dtypes)

df = pd.concat([df, city_df, yield_df], axis=1)
# Display the shape of the dataframe
print(df.shape)

# Configure the dimensions for the parallel coordinates plot
dimensions = ['Yield','City', 'Latitude', 'Longitude', 'Dew Point',
       'Feels Like', 'Min Temp', 'Max Temp',
       'Pressure', 'Humidity', 'Wind Speed', 'Wind Direction', 'Cloud Cover',
       'Weather ID']
df.head()


# Create the parallel coordinates plot
fig = px.parallel_coordinates(df,dimensions=dimensions, 
                            color='Similarity',
                            title="Dimensionality Reduction from Similarity Scores",
                            color_continuous_midpoint=0,
                            color_continuous_scale=px.colors.diverging.Tealrose
                            )

fig.update_layout(paper_bgcolor='#eceff4',
 width=1150, height=350
 )
fig.update_layout(title_x=0.5) 
# fig.write_html('../../data/pecan/parallel_coordinates/sim_reduction_graph.html')

fig.show()


Index(['City', 'Latitude', 'Longitude', 'Dew Point', 'Feels Like', 'Min Temp',
       'Max Temp', 'Pressure', 'Humidity', 'Wind Speed', 'Wind Direction',
       'Cloud Cover', 'Weather ID', 'Similarity'],
      dtype='object')
Latitude          float64
Longitude         float64
Dew Point         float64
Feels Like        float64
Min Temp          float64
Max Temp          float64
Pressure          float64
Humidity          float64
Wind Speed        float64
Wind Direction    float64
Cloud Cover       float64
Weather ID        float64
Similarity          int64
dtype: object
Latitude          float64
Longitude         float64
Dew Point         float64
Feels Like        float64
Min Temp          float64
Max Temp          float64
Pressure          float64
Humidity          float64
Wind Speed        float64
Wind Direction    float64
Cloud Cover       float64
Weather ID        float64
Similarity        float64
dtype: object
(462, 15)


In [16]:
fig.write_html('trash.html')