#### *VEERA PAVAN KUMAR SEERAPU*

#### *23285281*

#### *ANALYTICS PROGRAMMING AND DATA VISUALIZATION*

##### *Import necessary libraries*

In [1]:
import json

from pymongo import MongoClient

import pandas as pd

from sqlalchemy import create_engine

import numpy as np

from scipy.stats import ttest_ind, pearsonr
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report, roc_curve, roc_auc_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PolynomialFeatures

from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

import plotly.express as px
import plotly.graph_objects as go

from dash import Dash, dcc, html, Input, Output

import warnings
warnings.filterwarnings('ignore')

##### *Store Raw Data in MongoDB*

In [2]:
# MongoDB connection
client = MongoClient('mongodb://localhost:27017/')
db = client['temperature_data_db']
collection = db['raw_temperatures']

# Load JSON data
with open('Global_Temperatures.json', 'r') as file:
    data = json.load(file)

# Insert data into MongoDB
collection.drop() 
collection.insert_many(data)
print(f"Inserted {collection.count_documents({})} records into MongoDB.")

Inserted 3064 records into MongoDB.


##### *ETL Process*

In [3]:
# Extract from MongoDB
raw_data = list(collection.find())
df = pd.json_normalize(raw_data)

In [14]:
# Transform with Pandas
print(df.columns)

Index(['region', 'country', 'state', 'city', 'year', 'temperature'], dtype='object')


In [5]:
# Initial data exploration
print("Initial Data Info:")
print(df.info())

Initial Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3064 entries, 0 to 3063
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   region       3064 non-null   object 
 1   country      3064 non-null   object 
 2   state        3064 non-null   object 
 3   city         3064 non-null   object 
 4   year         3064 non-null   int64  
 5   temperature  3064 non-null   float64
dtypes: float64(1), int64(1), object(4)
memory usage: 143.8+ KB
None


In [6]:
print("Initial Null Values:")
print(df.isnull().sum())

Initial Null Values:
region         0
country        0
state          0
city           0
year           0
temperature    0
dtype: int64


In [7]:
# Handle non-finite values
df = df[df['temperature'].notna() & ~np.isinf(df['temperature'])]

In [8]:
# Data type conversion
df['year'] = pd.to_numeric(df['year'], errors='coerce').astype('Int64')
df['temperature'] = df['temperature'].astype(float)

In [9]:
# Handle remaining nulls
df['state'] = df['state'].fillna('')
df['city'] = df['city'].fillna('')
df = df.dropna(subset=['year', 'temperature'])

In [10]:
# Outlier detection (using IQR)
Q1 = df['temperature'].quantile(0.25)
Q3 = df['temperature'].quantile(0.75)
IQR = Q3 - Q1
df = df[(df['temperature'] >= (Q1 - 1.5 * IQR)) & (df['temperature'] <= (Q3 + 1.5 * IQR))]

In [11]:
print("\nData Info After Cleaning:")
print(df.info())


Data Info After Cleaning:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3064 entries, 0 to 3063
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   region       3064 non-null   object 
 1   country      3064 non-null   object 
 2   state        3064 non-null   object 
 3   city         3064 non-null   object 
 4   year         3064 non-null   Int64  
 5   temperature  3064 non-null   float64
dtypes: Int64(1), float64(1), object(4)
memory usage: 146.7+ KB
None


In [12]:
print("\nNull Values After Cleaning:")
print(df.isnull().sum())


Null Values After Cleaning:
region         0
country        0
state          0
city           0
year           0
temperature    0
dtype: int64


In [13]:
print("\nDescriptive Statistics:")
print(df.describe())


Descriptive Statistics:
              year  temperature
count       3064.0  3064.000000
mean   2007.219974    65.882901
std       7.439685    13.572005
min         1995.0    17.421642
25%         2001.0    53.831172
50%         2007.0    66.534751
75%         2014.0    79.474858
max         2020.0    90.750000


##### *Feature engineering for to Calculate the temperature change*

In [14]:
df = df.sort_values(['country', 'city', 'year'])
df['temp_change'] = df.groupby(['country', 'city'])['temperature'].pct_change() * 100
df['temp_change'] = df['temp_change'].fillna(0)

In [15]:
# Create binary target for Logistic Regression
median_temp = df['temperature'].median()
df['high_temp'] = (df['temperature'] > median_temp).astype(int)

##### *Load into PostgreSQL*

In [16]:
DB_NAME = "temperature_data_db"
USER = "postgres"
PASSWORD = "Pavanpostgre"  
HOST = "localhost"
PORT = "5432"
engine = create_engine(f'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{DB_NAME}')
df.to_sql('processed_temperatures', engine, if_exists='replace', index=False)
print("Processed data loaded into PostgreSQL.")

Processed data loaded into PostgreSQL.


##### *Advanced Statistical and Machine Learning Analysis*

In [17]:
df = pd.read_sql('SELECT * FROM processed_temperatures', engine)

##### *# Statistical Analysis: T-test for Africa vs. Europe temperatures in recent year*

In [18]:
recent_year = df['year'].max()
africa = df[(df['region'] == 'Africa') & (df['year'] == recent_year)]['temperature'].dropna()
europe = df[(df['region'] == 'Europe') & (df['year'] == recent_year)]['temperature'].dropna()

print(f"Africa {recent_year} data points: {len(africa)}")
print(f"Europe {recent_year} data points: {len(europe)}")

if len(africa) > 1 and len(europe) > 1:
    t_stat, p_value = ttest_ind(africa, europe, equal_var=False)
    print(f"T-test (Africa vs Europe {recent_year}): t-stat={t_stat:.2f}, p-value={p_value:.4f}")
else:
    print(f"T-test skipped: Not enough data points for Africa or Europe in {recent_year}.")

Africa 2020 data points: 23
Europe 2020 data points: 30
T-test (Africa vs Europe 2020): t-stat=14.16, p-value=0.0000


##### *Correlation for the Year vs Temperature for Brazil*

In [19]:
bra = df[df['country'] == 'Brazil'].copy()
bra['year'] = pd.to_numeric(bra['year'], errors='coerce')
bra['temperature'] = pd.to_numeric(bra['temperature'], errors='coerce')
bra = bra.dropna(subset=['year', 'temperature'])
if len(bra) > 1:
    corr, p_val = pearsonr(bra['year'], bra['temperature'])
    print(f"Correlation (Brazil Year vs Temperature): {corr:.4f}, p-value={p_val:.4f}")
else:
    print("Correlation skipped: Not enough data for Brazil.")

Correlation (Brazil Year vs Temperature): 0.3527, p-value=0.0772


##### *Machine Learning Models*

In [20]:
# Create df_ml and ensure no missing values in key columns
df_ml = df.dropna(subset=['year', 'temp_change', 'temperature', 'high_temp', 'region', 'country', 'city'])

In [21]:
# Feature Engineering: Add temp_lag1
df_ml['temp_lag1'] = df_ml.groupby(['country', 'city'])['temperature'].shift(1)
df_ml = df_ml.dropna(subset=['temp_lag1'])  

In [22]:
# Encode the 'region' column
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_regions = encoder.fit_transform(df_ml[['region']])
encoded_regions_df = pd.DataFrame(encoded_regions, columns=encoder.get_feature_names_out(['region']))

In [23]:
# Combine encoded regions with df_ml
df_ml = pd.concat([df_ml.reset_index(drop=True), encoded_regions_df], axis=1)

In [24]:
# Define features (X) and target (y_reg) for regression
X = df_ml[['year', 'temp_change', 'temp_lag1'] + list(encoder.get_feature_names_out(['region']))]
y_reg = df_ml['temperature'] 

# Split data
X_train, X_test, y_train_reg, y_test_reg = train_test_split(X, y_reg, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Linear Regression
mlr = LinearRegression()
mlr.fit(X_train_scaled, y_train_reg)
y_pred_mlr = mlr.predict(X_test_scaled)
mlr_r2 = r2_score(y_test_reg, y_pred_mlr)
mlr_mse = mean_squared_error(y_test_reg, y_pred_mlr)
print(f"Linear Regression: R2={mlr_r2:.4f}, MSE={mlr_mse:.4f}")

Linear Regression: R2=0.9991, MSE=0.1683


##### *Multiple Linear Regression*

In [25]:
# Train Linear Regression
mlr = LinearRegression()
mlr.fit(X_train_scaled, y_train_reg)
y_pred_mlr = mlr.predict(X_test_scaled)
mlr_r2 = r2_score(y_test_reg, y_pred_mlr)
mlr_mse = mean_squared_error(y_test_reg, y_pred_mlr)
print(f"Multiple Linear Regression: R2={mlr_r2:.4f}, MSE={mlr_mse:.4f}")

Multiple Linear Regression: R2=0.9991, MSE=0.1683


##### *Random Forest Classifier*

In [26]:
# Define features (X) and target (y_class) for classification
X = df_ml[['year', 'temp_change', 'temp_lag1'] + list(encoder.get_feature_names_out(['region']))]
y_class = df_ml['high_temp']  

# Split data
X_train, X_test, y_train_class, y_test_class = train_test_split(X, y_class, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_train_scaled, y_train_class = smote.fit_resample(X_train_scaled, y_train_class)

# Train Random Forest Classifier
rf_class = RandomForestClassifier(n_estimators=100, random_state=42)
rf_class.fit(X_train_scaled, y_train_class)

# Predict and evaluate
y_pred_rf = rf_class.predict(X_test_scaled)
print(f"Random Forest Accuracy: {accuracy_score(y_test_class, y_pred_rf):.4f}")
print("\nRandom Forest Classification Report:")
print(classification_report(y_test_class, y_pred_rf))

Random Forest Accuracy: 0.9881

Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       300
           1       0.99      0.99      0.99       287

    accuracy                           0.99       587
   macro avg       0.99      0.99      0.99       587
weighted avg       0.99      0.99      0.99       587



##### *Random Forest Regressor*

In [27]:
# Define features (X) and target (y_reg) for regression
X = df_ml[['year', 'temp_change', 'temp_lag1'] + list(encoder.get_feature_names_out(['region']))]
y_reg = df_ml['temperature']  

# Split data
X_train, X_test, y_train_reg, y_test_reg = train_test_split(X, y_reg, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Random Forest Regressor
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train_reg)
y_pred_rf = rf.predict(X_test_scaled)
rf_r2 = r2_score(y_test_reg, y_pred_rf)
rf_mse = mean_squared_error(y_test_reg, y_pred_rf)
print(f"Random Forest Regressor: R2={rf_r2:.4f}, MSE={rf_mse:.4f}")

Random Forest Regressor: R2=0.9985, MSE=0.2642


##### *K-Nearest Neighbors Regressor*

In [28]:
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train_scaled, y_train_reg)
y_pred_knn = knn.predict(X_test_scaled)
knn_r2 = r2_score(y_test_reg, y_pred_knn)
knn_mse = mean_squared_error(y_test_reg, y_pred_knn)
print(f"KNN Regressor: R2={knn_r2:.4f}, MSE={knn_mse:.4f}")

KNN Regressor: R2=0.9818, MSE=3.3071


##### *Logistic Regression*

In [29]:
# Define features (X) and target (y_class) for classification
X = df_ml[['year', 'temp_change', 'temp_lag1'] + list(encoder.get_feature_names_out(['region']))]
y_class = df_ml['high_temp'] 

# Split data
X_train, X_test, y_train_class, y_test_class = train_test_split(X, y_class, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_train_scaled, y_train_class = smote.fit_resample(X_train_scaled, y_train_class)
param_grid = {'C': [0.01, 0.1, 1, 10], 'solver': ['lbfgs', 'liblinear']}
grid_search = GridSearchCV(LogisticRegression(random_state=42, max_iter=1000), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_scaled, y_train_class)

# Get the best estimator
log_reg = grid_search.best_estimator_

# Predict and evaluate
y_pred_log_reg = log_reg.predict(X_test_scaled)
print(f"Tuned Logistic Regression Accuracy: {accuracy_score(y_test_class, y_pred_log_reg):.4f}")
print("\nTuned Logistic Regression Classification Report:")
print(classification_report(y_test_class, y_pred_log_reg))
print(f"Best Parameters: {grid_search.best_params_}")

Tuned Logistic Regression Accuracy: 0.9932

Tuned Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       300
           1       0.99      0.99      0.99       287

    accuracy                           0.99       587
   macro avg       0.99      0.99      0.99       587
weighted avg       0.99      0.99      0.99       587

Best Parameters: {'C': 10, 'solver': 'lbfgs'}


##### *Gradient Descent for Brazil*

In [30]:
def gradient_descent(X, y, learning_rate=0.0001, epochs=10000):
    X = (X - X.mean()) / X.std()
    m, b = 0, 0
    n = len(X)
    for _ in range(epochs):
        y_pred = m * X + b
        dm = -(2/n) * sum(X * (y - y_pred))
        db = -(2/n) * sum(y - y_pred)
        m -= learning_rate * dm
        b -= learning_rate * db
    return m, b

bra = df[df['country'] == 'Brazil'].dropna(subset=['year', 'temperature'])
if len(bra) > 1:
    X_gd = bra['year'].values
    y_gd = bra['temperature'].values
    m, b = gradient_descent(X_gd, y_gd)
    X_gd_scaled = (X_gd - X_gd.mean()) / X_gd.std()
    y_pred_gd = m * X_gd_scaled + b
    gd_rmse = np.sqrt(mean_squared_error(y_gd, y_pred_gd))
    print(f"Gradient Descent (Brazil): Slope={m:.2f}, Intercept={b:.2f}, RMSE={gd_rmse:.2f}")
else:
    print("Gradient Descent skipped: Not enough data for Brazil.")

Gradient Descent (Brazil): Slope=0.24, Intercept=62.28, RMSE=9.77


#### *Advanced Data Visualization (Interactive Dashboard)*

In [31]:
df = pd.read_sql('SELECT * FROM processed_temperatures', engine)

# Define color scheme
colors = {
    'background': '#1E1E2E',
    'card': '#2A2A3C',
    'text': '#D1D1D6',
    'primary': '#6E56CF',
    'secondary': '#FF6B6B',
}

In [32]:
pip install --upgrade pip

Note: you may need to restart the kernel to use updated packages.


In [33]:
pip install dash-bootstrap-components

Note: you may need to restart the kernel to use updated packages.


In [34]:
pip install dash-iconify

Note: you may need to restart the kernel to use updated packages.


In [15]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
from dash_iconify import DashIconify
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, roc_auc_score
from sqlalchemy import create_engine

# Database connection
DB_NAME = "temperature_data_db"
USER = "postgres"
PASSWORD = "Pavanpostgre"
HOST = "localhost"
PORT = "5432"
try:
    engine = create_engine(f'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{DB_NAME}')
    df = pd.read_sql('SELECT * FROM processed_temperatures', engine)
except Exception as e:
    print(f"Database connection failed: {e}")
    df = pd.DataFrame()  # Fallback to empty DataFrame

# Colors dictionary
colors = {
    'background': '#1E1E2E',
    'card': '#2A2A3C',
    'text': '#D1D1D6',
    'primary': '#6E56CF',
    'secondary': '#FF6B6B'
}

# Generate ROC Curve
if not df.empty:
    df_ml = df.dropna(subset=['year', 'temp_change', 'temperature', 'high_temp'])
    X = df_ml[['year', 'temp_change']]
    y_class = df_ml['high_temp']
    X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X, y_class, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_class_scaled = scaler.fit_transform(X_train_class)
    X_test_class_scaled = scaler.transform(X_test_class)
    log_reg = LogisticRegression(random_state=42)
    log_reg.fit(X_train_class_scaled, y_train_class)
    y_pred_proba_log_reg = log_reg.predict_proba(X_test_class_scaled)[:, 1]
    fpr, tpr, _ = roc_curve(y_test_class, y_pred_proba_log_reg)
    log_reg_roc_auc = roc_auc_score(y_test_class, y_pred_proba_log_reg)
else:
    fpr, tpr, log_reg_roc_auc = [0, 1], [0, 1], 0.5

fig_roc = go.Figure()
fig_roc.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC Curve (AUC = {log_reg_roc_auc:.2f})',
                             line=dict(color=colors['primary'])))
fig_roc.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Random Guess',
                             line=dict(color=colors['secondary'], dash='dash')))
fig_roc.update_layout(
    title='ROC Curve for Logistic Regression',
    plot_bgcolor=colors['card'],
    paper_bgcolor=colors['card'],
    font_color=colors['text'],
    title_font_size=20,
    xaxis_title="False Positive Rate",
    yaxis_title="True Positive Rate",
    legend_title="Legend",
    template='plotly_dark'
)

# App setup with Bootstrap DARKLY theme
app = Dash(__name__, external_stylesheets=[dbc.themes.DARKLY])

# Define custom Plotly template
custom_template = dict(
    layout=go.Layout(
        plot_bgcolor=colors['card'],
        paper_bgcolor=colors['card'],
        font=dict(color=colors['text'], family='Roboto, sans-serif'),
        title_font=dict(size=20, color=colors['text']),
        xaxis=dict(gridcolor='rgba(255,255,255,0.1)', title_font=dict(size=16)),
        yaxis=dict(gridcolor='rgba(255,255,255,0.1)', title_font=dict(size=16))
    )
)

# Helper function to create cards
def create_card(title, dropdown_id, options, value, multi=False, icon="mdi:thermometer"):
    return html.Div([
        html.H3([
            DashIconify(icon=icon, style={'marginRight': '10px'}),
            title
        ], style={
            'textAlign': 'center',
            'color': colors['text'],
            'marginBottom': '15px',
            'fontFamily': 'Roboto, sans-serif',
            'fontSize': '20px'
        }),
        dcc.Dropdown(
            id=dropdown_id,
            options=options,
            value=value,
            multi=multi,
            style={
                'width': '60%',
                'margin': '0 auto',
                'backgroundColor': colors['card'],
                'color': colors['text'],  # Improved contrast
                'borderRadius': '5px',
                'fontSize': '16px'
            }
        ),
        dbc.Tooltip(f"Select {'multiple' if multi else 'a'} {title.lower().split(' for ')[1]}", target=dropdown_id)
    ], className='card', style={
        'backgroundColor': colors['card'],
        'padding': '25px',
        'borderRadius': '15px',
        'boxShadow': '0 6px 18px rgba(0,0,0,0.4)',
        'margin': '20px',
        'transition': 'transform 0.2s, box-shadow 0.2s'
    })

# Layout
app.layout = dbc.Container([
    # Header
    dbc.Row([
        dbc.Col(html.H1("Global Temperature Analytics Dashboard", style={
            'textAlign': 'center',
            'color': colors['text'],
            'fontFamily': 'Roboto, sans-serif',
            'fontSize': '32px',
            'fontWeight': 'bold',
            'backgroundColor': colors['card'],
            'padding': '20px',
            'borderRadius': '15px',
            'boxShadow': '0 6px 18px rgba(0,0,0,0.4)',
            'margin': '20px'
        }), width=12)
    ]),
    # Year Range Slider
    dbc.Row([
        dbc.Col([
            html.H3("Filter by Year Range", style={
                'textAlign': 'center',
                'color': colors['text'],
                'fontFamily': 'Roboto, sans-serif'
            }),
            dcc.RangeSlider(
                id='year-slider',
                min=df['year'].min() if not df.empty else 2000,
                max=df['year'].max() if not df.empty else 2020,
                value=[df['year'].min(), df['year'].max()] if not df.empty else [2000, 2020],
                marks={str(year): str(year) for year in df['year'].unique()} if not df.empty else {str(i): str(i) for i in range(2000, 2021, 5)},
                step=None,
                className='mb-4'
            ),
            dbc.Tooltip("Select a year range to filter all visualizations", target='year-slider')
        ], width=12, className='card', style={
            'backgroundColor': colors['card'],
            'padding': '25px',
            'borderRadius': '15px',
            'boxShadow': '0 6px 18px rgba(0,0,0,0.4)',
            'margin': '20px'
        })
    ]),
    # Download Button
    dbc.Row([
        dbc.Col([
            html.Button("Download Data as CSV", id="download-btn", className='btn btn-primary'),
            dcc.Download(id="download-data")
        ], width=12, style={'textAlign': 'center', 'margin': '20px'})
    ]),
    # Accordion for Visualizations
    dbc.Accordion([
        # Line Plot Section
        dbc.AccordionItem([
            create_card("Select Countries for Temperature Trends", "country-line-dropdown",
                        [{'label': c, 'value': c} for c in df['country'].unique()] if not df.empty else [{'label': 'No Data', 'value': 'No Data'}],
                        df['country'].unique()[:3].tolist() if not df.empty else ['No Data'], multi=True),
            dcc.Loading(dcc.Graph(id='line-plot', config={'displayModeBar': False}, style={'borderRadius': '10px'}))
        ], title="Temperature Trends"),
        # Bar Plot Section
        dbc.AccordionItem([
            create_card("Select Year for Temperature Comparison", "year-bar-dropdown",
                        [{'label': y, 'value': y} for y in sorted(df['year'].unique())] if not df.empty else [{'label': '2020', 'value': 2020}],
                        df['year'].max() if not df.empty else 2020),
            dcc.Loading(dcc.Graph(id='bar-plot', config={'displayModeBar': False}, style={'borderRadius': '10px'}))
        ], title="Temperature Comparison"),
        # Scatter and Box Plot Section
        dbc.AccordionItem([
            dbc.Row([
                dbc.Col([
                    create_card("Select Country for Temperature with Change Rate", "country-scatter-dropdown",
                                [{'label': c, 'value': c} for c in df['country'].unique()] if not df.empty else [{'label': 'No Data', 'value': 'No Data'}],
                                'Brazil' if not df.empty and 'Brazil' in df['country'].unique() else 'No Data')
                ], width=6),
                dbc.Col([
                    create_card("Select Countries for Temperature Change Distribution", "country-box-dropdown",
                                [{'label': c, 'value': c} for c in df['country'].unique()] if not df.empty else [{'label': 'No Data', 'value': 'No Data'}],
                                df['country'].unique()[:5].tolist() if not df.empty else ['No Data'], multi=True)
                ], width=6)
            ]),
            dbc.Row([
                dbc.Col(dcc.Loading(dcc.Graph(id='scatter-plot', style={'borderRadius': '10px'})), width=6),
                dbc.Col(dcc.Loading(dcc.Graph(id='box-plot', style={'borderRadius': '10px'})), width=6)
            ])
        ], title="Temperature Change Analysis"),
        # Heatmap Section
        dbc.AccordionItem([
            create_card("Select Regions for Average Temperature with Year", "region-heatmap-dropdown",
                        [{'label': r, 'value': r} for r in df['region'].unique()] if not df.empty else [{'label': 'No Data', 'value': 'No Data'}],
                        df['region'].unique().tolist() if not df.empty else ['No Data'], multi=True),
            dcc.Loading(dcc.Graph(id='heatmap-plot', style={'borderRadius': '10px'}))
        ], title="Regional Temperature Heatmap"),
        # Map Section
        dbc.AccordionItem([
            create_card("Select Year for Global Temperature Map", "year-map-dropdown",
                        [{'label': y, 'value': y} for y in sorted(df['year'].unique())] if not df.empty else [{'label': '2020', 'value': 2020}],
                        df['year'].max() if not df.empty else 2020),
            dcc.Loading(dcc.Graph(id='map-plot', style={'borderRadius': '10px'}))
        ], title="Global Temperature Map"),
        # ROC Curve Section
        dbc.AccordionItem([
            dcc.Graph(id='roc-plot', figure=fig_roc, style={'borderRadius': '10px'})
        ], title="ROC Curve for Logistic Regression")
    ], style={'backgroundColor': colors['card'], 'borderRadius': '15px', 'margin': '20px'})
], fluid=True, style={
    'background': 'linear-gradient(180deg, #1E1E2E 0%, #2A2A3C 100%)',
    'fontFamily': 'Roboto, sans-serif',
    'padding': '20px'
})

# Client-side callback for dropdown CSS and card hover effects
app.clientside_callback(
    """
    function() {
        const style = document.createElement('style');
        style.type = 'text/css';
        style.innerHTML = `
            .Select-menu-outer {
                background-color: #2A2A3C !important;
            }
            .Select-option {
                color: #D1D1D6 !important;
                background-color: #2A2A3C !important;
            }
            .Select-option:hover {
                background-color: #6E56CF !important;
                color: white !important;
            }
            .Select-control {
                background-color: #2A2A3C !important;
                color: #D1D1D6 !important;
            }
            .Select-value-label {
                color: #D1D1D6 !important;
            }
            .Select-input > input {
                color: #D1D1D6 !important;
            }
            .card:hover {
                transform: scale(1.02);
                box-shadow: 0 8px 24px rgba(0,0,0,0.5);
            }
        `;
        document.head.appendChild(style);
        return null;
    }
    """,
    Output('line-plot', 'id'),
    Input('line-plot', 'id')
)

# Callback for Download
@app.callback(
    Output("download-data", "data"),
    Input("download-btn", "n_clicks"),
    prevent_initial_call=True
)
def download_data(n_clicks):
    if df.empty:
        return None
    return dcc.send_data_frame(df.to_csv, "temperature_data.csv")

# Callback for Line Plot
@app.callback(
    Output('line-plot', 'figure'),
    [Input('country-line-dropdown', 'value'), Input('year-slider', 'value')]
)
def update_line_plot(selected_countries, year_range):
    if not selected_countries or df.empty or 'No Data' in selected_countries:
        fig = go.Figure()
        fig.add_annotation(
            text="No data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=dict(size=20, color=colors['text'])
        )
        fig.update_layout(
            title='Temperature Trends by Country',
            xaxis_title="Year",
            yaxis_title="Temperature (°F)",
            legend_title="Country",
            hovermode='closest',
            template=custom_template,
            showlegend=True,
            margin=dict(l=50, r=50, t=50, b=50),
            height=500,
            dragmode='zoom',
            xaxis_rangeslider_visible=True
        )
        return fig

    filtered_df = df[df['country'].isin(selected_countries) & (df['year'].between(year_range[0], year_range[1]))]
    fig = go.Figure()
    for country in selected_countries:
        country_df = filtered_df[filtered_df['country'] == country]
        if not country_df.empty:
            fig.add_trace(go.Scatter(
                x=country_df['year'],
                y=country_df['temperature'],
                mode='lines+markers',
                name=country,
                hovertemplate='<b>%{text}</b><br>Year: %{x}<br>Temperature: %{y:.2f}°F<br>Temp Change: %{customdata:.2f}%',
                text=[country] * len(country_df),
                customdata=country_df['temp_change'],
                line=dict(width=2),
                marker=dict(size=6)
            ))
            # Add annotation for peak temperature
            max_temp = country_df['temperature'].max()
            max_year = country_df[country_df['temperature'] == max_temp]['year'].iloc[0]
            fig.add_annotation(
                x=max_year, y=max_temp,
                text=f"Peak: {max_temp:.1f}°F",
                showarrow=True,
                arrowhead=2,
                ax=20, ay=-30,
                font=dict(color=colors['text'], size=12)
            )

    fig.update_layout(
        title='Temperature Trends by Country',
        xaxis_title="Year",
        yaxis_title="Temperature (°F)",
        legend_title="Country",
        hovermode='closest',
        template=custom_template,
        showlegend=True,
        margin=dict(l=50, r=50, t=50, b=50),
        height=500,
        dragmode='zoom',
        xaxis_rangeslider_visible=True
    )
    return fig

# Callback for Bar Plot
@app.callback(
    Output('bar-plot', 'figure'),
    [Input('year-bar-dropdown', 'value'), Input('year-slider', 'value')]
)
def update_bar_plot(selected_year, year_range):
    if df.empty:
        fig = go.Figure()
        fig.add_annotation(
            text="No data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=dict(size=20, color=colors['text'])
        )
        fig.update_layout(
            title='Temperature Comparison',
            xaxis_title="Country",
            yaxis_title="Temperature (°F)",
            template=custom_template,
            height=500
        )
        return fig

    if selected_year not in range(year_range[0], year_range[1] + 1):
        selected_year = year_range[1]
    filtered_df = df[df['year'] == selected_year]
    fig = px.bar(filtered_df, x='country', y='temperature',
                 title=f'Temperature Comparison in {selected_year}',
                 text=filtered_df['temperature'].apply(lambda x: f'{x:.2f}'))
    fig.update_traces(marker_color=colors['primary'], textposition='auto')
    fig.update_layout(
        xaxis_title="Country",
        yaxis_title="Temperature (°F)",
        showlegend=False,
        template=custom_template,
        height=500
    )
    return fig

# Callback for Scatter Plot
@app.callback(
    Output('scatter-plot', 'figure'),
    [Input('country-scatter-dropdown', 'value'), Input('year-slider', 'value')]
)
def update_scatter_plot(selected_country, year_range):
    if df.empty or selected_country == 'No Data':
        fig = go.Figure()
        fig.add_annotation(
            text="No data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=dict(size=20, color=colors['text'])
        )
        fig.update_layout(
            title='Temperature with Change Rate',
            xaxis_title="Year",
            yaxis_title="Temperature (°F)",
            template=custom_template,
            height=500
        )
        return fig

    filtered_df = df[(df['country'] == selected_country) & (df['year'].between(year_range[0], year_range[1]))]
    fig = px.scatter(filtered_df, x='year', y='temperature',
                     size=filtered_df['temp_change'].abs(),
                     color=filtered_df['temp_change'].apply(lambda x: 'Positive' if x > 0 else 'Negative'),
                     title=f'{selected_country} Temperature with Change Rate',
                     trendline='ols',
                     color_discrete_map={'Positive': colors['primary'], 'Negative': colors['secondary']})
    fig.update_layout(
        xaxis_title="Year",
        yaxis_title="Temperature (°F)",
        legend_title="Change Direction",
        template=custom_template,
        height=500,
        dragmode='zoom',
        xaxis_rangeslider_visible=True
    )
    return fig

# Callback for Box Plot
@app.callback(
    Output('box-plot', 'figure'),
    [Input('country-box-dropdown', 'value'), Input('year-slider', 'value')]
)
def update_box_plot(selected_countries, year_range):
    if df.empty or not selected_countries or 'No Data' in selected_countries:
        fig = go.Figure()
        fig.add_annotation(
            text="No data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=dict(size=20, color=colors['text'])
        )
        fig.update_layout(
            title='Temperature Change Distribution',
            xaxis_title="Country",
            yaxis_title="Temperature Change (%)",
            template=custom_template,
            height=500
        )
        return fig

    if not selected_countries:
        selected_countries = df['country'].unique()[:5].tolist()
    filtered_df = df[df['country'].isin(selected_countries) & (df['year'].between(year_range[0], year_range[1]))]
    fig = px.box(filtered_df, x='country', y='temp_change',
                 title='Temperature Change Distribution by Country')
    fig.update_traces(marker_color=colors['secondary'])
    fig.update_layout(
        xaxis_title="Country",
        yaxis_title="Temperature Change (%)",
        showlegend=False,
        template=custom_template,
        height=500
    )
    return fig

# Callback for Heatmap
@app.callback(
    Output('heatmap-plot', 'figure'),
    [Input('region-heatmap-dropdown', 'value'), Input('year-slider', 'value')]
)
def update_heatmap(selected_regions, year_range):
    if df.empty or not selected_regions or 'No Data' in selected_regions:
        fig = go.Figure()
        fig.add_annotation(
            text="No data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=dict(size=20, color=colors['text'])
        )
        fig.update_layout(
            title='Average Temperature by Region and Year',
            xaxis_title="Year",
            yaxis_title="Region",
            template=custom_template,
            height=500
        )
        return fig

    if not selected_regions:
        selected_regions = df['region'].unique().tolist()
    filtered_df = df[df['region'].isin(selected_regions) & (df['year'].between(year_range[0], year_range[1]))]
    pivot_df = filtered_df.pivot_table(values='temperature', index='region', columns='year', aggfunc='mean')
    fig = go.Figure(data=go.Heatmap(
        z=pivot_df.values,
        x=pivot_df.columns,
        y=pivot_df.index,
        colorscale='Viridis',
        showscale=True))
    fig.update_layout(
        title='Average Temperature by Region and Year',
        xaxis_title="Year",
        yaxis_title="Region",
        template=custom_template,
        height=500
    )
    return fig

# Callback for Map
@app.callback(
    Output('map-plot', 'figure'),
    [Input('year-map-dropdown', 'value'), Input('year-slider', 'value')]
)
def update_map_plot(selected_year, year_range):
    if df.empty:
        fig = go.Figure()
        fig.add_annotation(
            text="No data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=dict(size=20, color=colors['text'])
        )
        fig.update_layout(
            title='Global Temperature Map',
            template=custom_template,
            height=500
        )
        return fig

    if selected_year not in range(year_range[0], year_range[1] + 1):
        selected_year = year_range[1]
    filtered_df = df[df['year'] == selected_year]
    fig = px.choropleth(filtered_df, locations='country', locationmode='country names',
                        color='temperature', hover_name='country',
                        title=f'Global Temperature Map for {selected_year}',
                        color_continuous_scale=px.colors.sequential.Plasma)
    fig.update_layout(
        geo=dict(bgcolor=colors['card'], showframe=False),
        template=custom_template,
        height=500
    )
    return fig

# Run app
if __name__ == '__main__':
    app.run(debug=True, port=8050)