In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# CONFIGURATION
DISTRICTS = [
    'bagalkot', 'ballari', 'belagavi', 'bengaluru-rural', 'bengaluru-urban', 
    'bidar', 'chamarajanagar', 'chikballapur', 'chikkamagaluru', 'chitradurga', 
    'dakshina-kannada', 'davangere', 'dharwad', 'gadag', 'hassan', 'haveri', 
    'kalaburagi', 'kodagu', 'kolar', 'koppal', 'mandya', 'mysuru', 'raichur', 
    'ramanagara', 'shivamogga', 'tumakuru', 'udupi', 'uttara-kannada', 
    'vijayanagara', 'vijayapura', 'yadgir'
]

METRICS_H3 = [
    "Total Population", "Total Youth Population", "Savings per Working Age Individual",
    "Mudra Loan to Labor Force Ratio", "CSR Spending per Capita (Rs.)", "CSR Share (% of State)",
    "GDP Growth Rate (%)", "Per Capita Income (Rs.)", "Trains per Week per 1000 sqkm",
    "MSMEs per 10k Population", "EPFO Firms", "Number of Jobs", "Employment in all MSMEs",
    "Registered Unorganised Workers", "Labor force participation (%)", "Unemployment rate (%)",
    "EPFO Coverage Rate (% of the Labor Force)", "Women Hostels", "Schools", "Enrollment Ratio",
    "GER, Higher Education", "Test scores (%)", "Number of Colleges", "Libraries",
    "ITI Seats per 1 lac Youth", "Seats in Top 3 Trades", "Sanctioned Trainers",
    "PMKVY Enrollment per 1 Lac Youth", "PMKVY Enrollments in Top 3 Jobs (%)",
    "Apprentices per 1 Lac Youth", "Enrolment (6-12 average)", "ITI Certified Trainers",
    "PMKVY Trainees Certified %", "Learning Outcomes (average)", "ITI Seats Vacancy %",
    "ITI Trainer Vacancy %"
]

SCORES = [
    "Y-Power Score", "Opportunity", "Workforce", "Education", "Readiness and Skills"
]

def get_driver():
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
    chrome_options.add_argument("--window-size=1920,3000")
    
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=chrome_options)

def get_value(driver, label):
    try:
        # Categorical Scores (like Y-Power Score, etc.)
        # They are usually followed by a span with the score
        score_xpaths = [
            f"//*[text()='{label}']/following::span[1]",
            f"//*[contains(text(), '{label}')]/following::span[1]"
        ]
        
        # Grid Metrics (Usually noticed in h3 cards)
        metric_xpaths = [
            f"//*[text()='{label}']/following::div[contains(@class, 'font-bold') or contains(@class, 'text-2xl')][1]",
            f"//*[contains(text(), '{label}')]/following::div[contains(@class, 'font-bold') or contains(@class, 'text-2xl')][1]",
            f"//*[text()='{label}']/ancestor::div[1]/following-sibling::div[1]"
        ]
        
        # Try Score XPaths first if it's a score label
        if label in SCORES:
            for xpath in score_xpaths:
                try:
                    el = driver.find_element(By.XPATH, xpath)
                    val = el.text.strip()
                    if val and val != "i":
                        if "/" in val: val = val.split("/")[0].strip()
                        if val.isdigit() or (val.replace('.','',1).isdigit()):
                            return val
                except:
                    continue
        
        # Try Metric XPaths
        for xpath in metric_xpaths:
            try:
                el = driver.find_element(By.XPATH, xpath)
                val = el.text.strip()
                if val and val != "i":
                    if "Avg:" in val:
                        val = val.split("Avg:")[0].strip()
                    return val
            except:
                continue
                
        return "N/A"
    except:
        return "N/A"

def scrape_all():
    driver = get_driver()
    all_data = []
    
    try:
        for idx, dist in enumerate(DISTRICTS):
            print(f"[{idx+1}/{len(DISTRICTS)}] Scraping: {dist}...", flush=True)
            url = f"https://youthpower.in/scorecard?state=karnataka&district={dist}"
            driver.get(url)
            
            try:
                # Wait for main content
                WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), 'Y-Power Score')]")))
                time.sleep(5) # Allow data to settle
                
                row = {"District": dist.replace("-", " ").title()}
                
                # Record all visible data
                for label in SCORES + METRICS_H3:
                    row[label] = get_value(driver, label)
                
                all_data.append(row)
                print(f"  Done. Score: {row['Y-Power Score']}")
                
            except Exception as e:
                print(f"  Failed to scrape {dist}: {e}")
                
            if (idx + 1) % 5 == 0:
                pd.DataFrame(all_data).to_csv("data/karnataka_complete_scorecard_partial.csv", index=False)
    
    finally:
        driver.quit()
        
    if all_data:
        df = pd.DataFrame(all_data)
        df.to_csv("data/karnataka_complete_scorecard.csv", index=False)
        print("\nSUCCESS: Data saved to karnataka_complete_scorecard.csv")
    else:
        print("\nERROR: No data was captured.")

if __name__ == "__main__":
    scrape_all()


[1/31] Scraping: bagalkot...
  Done. Score: 49
[2/31] Scraping: ballari...
  Done. Score: 51
[3/31] Scraping: belagavi...
  Done. Score: 48
[4/31] Scraping: bengaluru-rural...
  Done. Score: 62
[5/31] Scraping: bengaluru-urban...
  Done. Score: 62
[6/31] Scraping: bidar...
  Done. Score: 37
[7/31] Scraping: chamarajanagar...
  Done. Score: 38
[8/31] Scraping: chikballapur...
  Done. Score: 42
[9/31] Scraping: chikkamagaluru...
  Done. Score: 47
[10/31] Scraping: chitradurga...
  Done. Score: 39
[11/31] Scraping: dakshina-kannada...
  Done. Score: 57
[12/31] Scraping: davangere...
  Done. Score: 46
[13/31] Scraping: dharwad...
  Done. Score: 57
[14/31] Scraping: gadag...
  Done. Score: 45
[15/31] Scraping: hassan...
  Done. Score: 52
[16/31] Scraping: haveri...
  Done. Score: 41
[17/31] Scraping: kalaburagi...
  Done. Score: 40
[18/31] Scraping: kodagu...
  Done. Score: 46
[19/31] Scraping: kolar...
  Done. Score: 47
[20/31] Scraping: koppal...
  Done. Score: 49
[21/31] Scraping: mandya

In [24]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.neighbors import KNeighborsRegressor
import json
import plotly.utils


# --- 1. DATA PREPARATION ---
df = pd.read_csv('data/karnataka_complete_scorecard.csv')

def vectorize_metric(val):
    if pd.isna(val) or val == 'N/A' or val == '': return 0.0
    val = str(val).upper().replace(',', '').replace('%', '').strip()
    # Handle Magnitude (Lakhs, Thousands, Millions)
    multiplier = 100000.0 if 'L' in val else 1000.0 if 'K' in val else 1000000.0 if 'M' in val else 1.0
    numeric_part = ''.join(c for c in val if c.isdigit() or c == '.')
    try: return float(numeric_part) * multiplier if numeric_part else 0.0
    except: return 0.0

# Define Vector Dimensions
knn_dims = ['Opportunity', 'Workforce', 'Education', 'Readiness and Skills']

for col in list(set(knn_dims)):
    df[col] = df[col].apply(vectorize_metric)

# --- 2. KNN CONVERGENCE ENGINE ---
BENGALURU_EQ = 62
GROWTH = 0.05

def calc_convergence(row):
    mag = row[knn_dims].mean()
    if mag >= BENGALURU_EQ: return 0
    y = 0
    while mag < BENGALURU_EQ and y < 50:
        y += 1; mag *= (1 + GROWTH)
    return y

df['Target_Years'] = df.apply(calc_convergence, axis=1)
knn = KNeighborsRegressor(n_neighbors=3).fit(df[knn_dims], df['Target_Years'])
df['KNN_Projected_Years'] = knn.predict(df[knn_dims])

# --- 4. PRO KNN CHART (Geometric Mesh) ---
df_sorted = df.sort_values('Opportunity')
fig_knn = go.Figure()

# Background Mesh Line
fig_knn.add_trace(go.Scatter(
    x=df_sorted['Opportunity'], y=df_sorted['Education'], mode='lines',
    line=dict(color='rgba(0, 255, 204, 0.2)', width=1), hoverinfo='none', showlegend=False
))

# GENERATE SPECIALIZED HOVER LABELS
hover_labels = [
    f"<b>{row['District']}</b><br>" +
    f"Opportunity score: {row['Opportunity']:,.0f}<br>" +
    f"Education score: {row['Education']:,.0f}<br>" +
    f"<b>Years to reach Bengaluru Urban: {row['KNN_Projected_Years']:.1f} yrs</b>"
    for _, row in df.iterrows()
]

# Hexagonal Nodes
fig_knn.add_trace(go.Scatter(
    x=df['Opportunity'], y=df['Education'], mode='markers+text',
    hovertext=hover_labels, # The integrated convergence metric
    hoverinfo='text',
    marker=dict(
        size=(50 - df['KNN_Projected_Years'])*0.4, color=df['KNN_Projected_Years'],
        colorscale='Viridis', reversescale=True, symbol='hexagon',
        line=dict(width=1, color='white'),
        colorbar=dict(
            title="<b>Projected<br>Years</b>",
            thickness=12,      # Thinner bar
            len=0.8,           # Reduced height (40% of canvas)
            y=0.5,             # Centered vertically
            yanchor='middle',
            outlinewidth=0
        )
    ),
    text=[f"<b>{d}</b>" if y < 5 else "" for d, y in zip(df['District'], df['KNN_Projected_Years'])],
    textposition="top center", name='Districts'
))

fig_knn.update_layout(
    height=600, # Increases the vertical footprint
    width=750,
    title=dict(text = ("<b>REGIONAL DYNAMICS: K-NEAREST NEIGHBOR (KNN) PREDICTIONS</b><br>"
            "<span style='font-size:13px; color:#2d3436; font-family:Arial;'> "
            "Estimating temporal distance (years) for districts to reach Bengaluru benchmark levels</span><br>"
            "<span style='font-size:11px; color:#7f8c8d; font-family:monospace;'> "))
    ,
    xaxis=dict(title="<b>OPPORTUNITY SCORE</b><br> ", gridcolor="#f5f5f5", showline=False, tickfont=dict(color="#555555")),
    yaxis=dict(title="<b>EDUCATION SCORE</b><br> ", gridcolor="#f5f5f5", showline=False, tickfont=dict(color="#555555")),
    template='plotly_white', margin=dict(t=120, l=120, r=120, b=120),
    annotations=[
        dict(
            x=0.5, y=-0.2,
            xref="paper", yref="paper",
            text="<i>Note: Model parameters assume a 5% average growth rate across districts.</i>",
            showarrow=False,
            font=dict(size=10, color="#95a5a6"),
            align="center"
        )
    ]
)

fig_knn.show()

In [25]:
import plotly.io as pio
# Create your plot (fig_knn) as usual...

# Export ONLY the necessary div and script (not a full page)
plot_knn = pio.to_html(fig_knn, full_html=False, include_plotlyjs='cdn')

with open('data/knn_component.html', 'w') as f:
    f.write(plot_knn)

In [82]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# 1. LOAD AND PRECISE CLEANING
df = pd.read_csv('data/karnataka_complete_scorecard.csv')

def clean_val(val):
    if pd.isna(val) or val == 'N/A': return 0
    val = str(val).upper().replace(',', '').replace('%', '')
    if 'L' in val: return float(val.replace('L', '')) * 100000
    if 'K' in val: return float(val.replace('K', '')) * 1000
    try: return float(val)
    except: return 0

df['Y-Power Score'] = df['Y-Power Score'].apply(clean_val)

# 2. ISOLATING THE TOP 10 MAGNITUDE NODES
# We sort descending for the top 10, then ascending for the chart y-axis ranking
df_top10 = df.sort_values('Y-Power Score', ascending=False).head(10).copy()
df_top10 = df_top10.sort_values('Y-Power Score', ascending=True)

# 3. OPTION A: PROFESSIONAL HORIZONTAL BAR (High Clarity)
fig_bar = px.bar(
    df_top10,
    x='Y-Power Score',
    y='District',
    orientation='h',
    text='Y-Power Score',
    color='Y-Power Score',
    color_continuous_scale='Blues',
    template='plotly_white',
    title='<b>TOP 10 DISTRICTS BY Y-POWER SCORE</b><br>'
)

fig_bar.update_layout(
    title=dict(text=("<b>TOP 10 DISTRICTS BY Y-POWER SCORE</b><br>"
            "<span style='font-size:13px; color:#95a5a6; font-family:Arial;'> "
            "<i>Source: <u>https://youthpower.in/scorecard?state=karnataka</u></i></span><br>"
            "<span style='font-size:10px; color:#7f8c8d; font-family:monospace;'> ")),
    xaxis_title='Youth-Power Score</b><br>',
    yaxis_title=None,
    font=dict(family="Arial, sans-serif", size=12, color="#2d3436"),
    margin=dict(l=100, r=50, t=80, b=20),
    coloraxis_showscale=False,
    height=600,
    width = 750,
    hovermode='y unified',
)

fig_bar.show()


In [83]:
plot_y_power = pio.to_html(fig_bar, full_html=False, include_plotlyjs='cdn')
with open('data/y_power_bar_plot.html', 'w') as f:
    f.write(plot_y_power)

In [85]:
import pandas as pd
import plotly.express as px

# 1. DATA VECTORIZATION
df = pd.read_csv('data/karnataka_complete_scorecard.csv')

def clean_val(val):
    if pd.isna(val) or val == 'N/A': return 0
    val = str(val).upper().replace(',', '').replace('%', '')
    if 'L' in val: return float(val.replace('L', '')) * 100000
    if 'K' in val: return float(val.replace('K', '')) * 1000
    try: return float(val)
    except: return 0

cols_to_clean = ['MSMEs per 10k Population', 'Readiness and Skills', 'Y-Power Score']
for col in cols_to_clean:
    df[col] = df[col].apply(clean_val)

# 2. SELECTING TOP 10 STRATEGIC NODES
df_top10 = df.sort_values('Y-Power Score', ascending=False).head(10).copy()

# 3. CONSTRUCTING THE CLEAN PLOTLY PROJECTION
fig_industry = px.scatter(
    df_top10,
    x='MSMEs per 10k Population',
    y='Readiness and Skills',
    size='Y-Power Score',
    color='Y-Power Score',
    text='District',
    template='plotly_white', # Professional light theme
    color_continuous_scale='Blues',
    title='<b>INDUSTRIAL DENSITY VS SKILL READINESS: TOP 10 DISTRICTS</b>'
)

# 4. REFINING GEOMETRIC NEATNESS
fig_industry.update_traces(
    marker=dict(line=dict(width=1.5, color='DarkSlateGrey')), # Defined node boundaries
    textposition='top center', # Clean label placement
    opacity=0.85
)

fig_industry.update_layout(
    title = dict(text=("<b>INDUSTRIAL DENSITY VS SKILL READINESS: TOP 10 DISTRICTS</b><br>"
            "<span style='font-size:13px; color:#95a5a6; font-family:Arial;'> "
            "<i>Source: <u>https://youthpower.in/scorecard?state=karnataka</u></i></span><br>"
            "<span style='font-size:10px; color:#7f8c8d; font-family:monospace;'> ")),
    xaxis_title='<b>MSMEs per 10k Population</b>',
    yaxis_title='<b>Readiness & Skills</b>',
    font=dict(family="Arial, sans-serif", size=12, color="#2d3436"),
    margin=dict(l=50, r=50, t=100, b=50),
    coloraxis_colorbar=dict(title="<b>Y-Power</b>"),
    height=600,
    width = 750,
    title_font_size=20
)

fig_industry.show()

In [87]:
plot_industry_readiness = pio.to_html(fig_industry, full_html=False, include_plotlyjs='cdn')
with open('data/plot_industry_readiness.html', 'w') as f:
    f.write(plot_industry_readiness)

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

# 1. DATA VECTORIZATION & CLEANING
df = pd.read_csv('data/karnataka_complete_scorecard.csv')

def clean_val(val):
    if pd.isna(val) or val == 'N/A': return 0
    val = str(val).upper().replace(',', '').replace('%', '')
    if 'L' in val: return float(val.replace('L', '')) * 100000
    if 'K' in val: return float(val.replace('K', '')) * 1000
    try: return float(val)
    except: return 0

metrics = [
    'Y-Power Score', 'Per Capita Income (Rs.)', 'Workforce', 'GER, Higher Education',
    'CSR Spending per Capita (Rs.)', 'Labor force participation (%)', 'Unemployment rate (%)',
    'ITI Seats per 1 lac Youth', 'Test scores (%)', 'Learning Outcomes (average)',
    'Total Youth Population', 'Registered Unorganised Workers', 'Education', 'Readiness and Skills'
]

for col in metrics:
    df[col] = df[col].apply(clean_val)

# Isolate Top 10 districts for high-clarity professional analysis
df_top10 = df.sort_values('Y-Power Score', ascending=False).head(10).copy()

# 2. DESIGN ENGINE FOR GEOMETRIC STYLE
def apply_vector_style(fig, title, xaxis_label, yaxis_label, chart_type='scatter'):
    fig.update_layout(
        title=dict(text=f"<b>{title}</b>", font=dict(size=20, color="#2D3436"), x=0),
        xaxis=dict(title=f"<b>{xaxis_label}</b>", showgrid=True, gridcolor="#F1F2F6", linecolor="#DCDDE1"),
        yaxis=dict(title=f"<b>{yaxis_label}</b>" if yaxis_label else "", showgrid=True, gridcolor="#F1F2F6", linecolor="#DCDDE1"),
        template='plotly_white',
        font=dict(family="Arial, sans-serif", size=12, color="#2D3436"),
        margin=dict(l=150 if chart_type == 'bar' else 80, r=80, t=100, b=80),
        height=600,
        coloraxis_showscale=False
    )
    
    if chart_type == 'scatter':
        fig.update_traces(textposition='top center', marker=dict(line=dict(width=1, color='#2D3436'), opacity=0.85))
    elif chart_type == 'bar':
        fig.update_traces(textposition='outside', marker=dict(line=dict(width=1, color='#2D3436'), opacity=0.9))

# --- PLOT SUITE (PLOTS 3 - 10) ---

# Plot 5: CSR Spending (Horizontal Bar)
fig5 = px.bar(df_top10.sort_values('CSR Spending per Capita (Rs.)'), x='CSR Spending per Capita (Rs.)', 
             y='District', orientation='h', color='CSR Spending per Capita (Rs.)', color_continuous_scale='Tealgrn')
apply_vector_style(fig5, "CSR Investment per Capita", "Rs. per Capita", "", 'bar')
fig5.show()

# Plot 6: Labor Market Friction
fig6 = px.scatter(df_top10, x='Labor force participation (%)', y='Unemployment rate (%)', size='Workforce',
                 color='Workforce', text='District', color_continuous_scale='Reds')
apply_vector_style(fig6, "Labor Market Friction Map", "Participation %", "Unemployment %", 'scatter')
fig6.show()

# Plot 7: Vocational Training Density
fig7 = px.bar(df_top10.sort_values('ITI Seats per 1 lac Youth'), x='ITI Seats per 1 lac Youth', y='District', 
             orientation='h', color='ITI Seats per 1 lac Youth', color_continuous_scale='GnBu')
apply_vector_style(fig7, "Vocational Training Density (ITI)", "Seats per 1 Lac Youth", "", 'bar')
fig7.show()


# Plot 9: Youth Vulnerability
fig9 = px.scatter(df_top10, x='Total Youth Population', y='Registered Unorganised Workers', size='Y-Power Score',
                 color='Y-Power Score', text='District', color_continuous_scale='OrRd')
apply_vector_style(fig9, "Youth Vulnerability Node Map", "Total Youth Pop", "Unorganised Workers", 'scatter')
fig9.show()

# Plot 10: Human Capital Transformation
fig10 = px.scatter(df_top10, x='Education', y='Readiness and Skills', size='Y-Power Score',
                  color='Y-Power Score', text='District', color_continuous_scale='Viridis')
apply_vector_style(fig10, "Human Capital Transformation", "Education Score", "Readiness & Skills", 'scatter')
fig10.update_layout(coloraxis_showscale=True)
fig10.show()

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression

# 1. Vectorization / Cleaning
def vectorize_metric(val):
    if pd.isna(val) or val == 'N/A' or val == '': return 0.0
    val = str(val).upper().replace(',', '').replace('%', '').strip()
    multiplier = 100000.0 if 'L' in val else 1000.0 if 'K' in val else 1.0
    numeric_part = ''.join(c for c in val if c.isdigit() or c == '.')
    try: return float(numeric_part) * multiplier if numeric_part else 0.0
    except: return 0.0

def normalize(series):
    denom = (series.max() - series.min())
    if denom == 0: return series * 0.0
    return (series - series.min()) / denom

# Load and Filter
df = pd.read_csv('data/karnataka_complete_scorecard.csv')
df = df[df['District'] != 'Vijayanagara']

# Prepare Vectors
cols = ['Number of Colleges', 'Libraries', 'Enrollment Ratio', 'Test scores (%)']
for col in cols:
    df[col] = df[col].apply(vectorize_metric)

df['Norm_Libraries'] = normalize(df['Libraries'])
df['Norm_Colleges'] = normalize(df['Number of Colleges'])
df['Norm_Enrollment'] = normalize(df['Enrollment Ratio'])
df['Academic_Proxy'] = df[['Norm_Libraries', 'Norm_Colleges', 'Norm_Enrollment']].mean(axis=1) * 100

# 2. Sklearn Regression Model
reg_df = df[(df['Test scores (%)'] > 0) & (df['Academic_Proxy'] > 0)].copy()
X = reg_df[['Test scores (%)']]
y = reg_df['Academic_Proxy']

model = LinearRegression()
model.fit(X, y)

y_pred = model.predict(X)
r_sq = model.score(X, y)
corr_coeff = np.sqrt(r_sq) if model.coef_[0] > 0 else -np.sqrt(r_sq)

# 3. Plotly Visualization (Geometric Vector Design Style)
# Using a clean, architectural vector aesthetic
fig = px.scatter(reg_df, x='Test scores (%)', y='Academic_Proxy',
                 text='District',
                 hover_data=['Test scores (%)', 'Academic_Proxy'],
                 title="<b>Geometric Regression Analysis: Efficiency vs Capacity Vectors</b>")

# Add Regression Line Vector
fig.add_trace(go.Scatter(x=reg_df['Test scores (%)'], y=y_pred,
                         mode='lines',
                         name=f'Regression Vector (r={corr_coeff:.3f})',
                         line=dict(color='#FF5733', width=3, dash='solid')))

# Geometric Styling
fig.update_traces(marker=dict(size=10, 
                              line=dict(width=1, color='DarkSlateGrey')),
                  textposition='top center')

fig.update_layout(
    template='plotly_white',
    xaxis_title="<b>Test Efficiency Node (%)</b>",
    yaxis_title="<b>Infrastructure Capacity Node</b>",
    font=dict(family="Courier New, monospace", size=12, color="#2c3e50"),
    margin=dict(l=40, r=40, t=80, b=40),
    plot_bgcolor='#f9f9f9',
    paper_bgcolor='white',
    showlegend=False
)

# Add Annotation for Metrics
fig.add_annotation(
    x=0.05, y=0.95,
    xref="paper", yref="paper",
    text=f"Correlation (r): {corr_coeff:.4f}<br>R-Squared: {r_sq:.4f}",
    showarrow=False,
    font=dict(size=12, color="#FF5733"),
    align="left",
    bgcolor="white",
    bordercolor="#FF5733",
    borderwidth=1
)

# Save to JSON for display
fig.show()

print(f"Correlation Coefficient: {corr_coeff}")
print(f"R-squared: {r_sq}")

Correlation Coefficient: 0.1505008202642281
R-squared: 0.022650496900205486
