In [1]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px
import json
from sklearn.decomposition import PCA

# Load dataset
file_path = '/Users/meghapatel/Downloads/dc.csv'  # Update with your file path
data = pd.read_csv(file_path)

# Load GeoJSON for India
geojson_path = '/Users/meghapatel/Downloads/india_states.geojson'  # Adjust path as needed
with open(geojson_path, 'r') as file:
    geojson_data = json.load(file)

# Print state names in data
print("Data States:", data['State/ UT'].unique())

# Print state names in GeoJSON
geojson_states = []
for feature in geojson_data['features']:
    state_name = feature['properties'].get('st_nm', None)
    if state_name:
        geojson_states.append(state_name.strip())
print("GeoJSON States:", geojson_states)

# Ensure matching names between data and GeoJSON
data['State/ UT'] = data['State/ UT'].str.strip()

# Compute the correlation matrix
correlation_matrix = data.corr(numeric_only=True)

# Create a correlation heatmap using Plotly
corr_fig = px.imshow(
    correlation_matrix,
    text_auto=True,
    aspect='auto',
    color_continuous_scale='RdBu',
    title='Correlation Matrix of PHC Attributes',
    labels={'color': 'Correlation'}
)

corr_fig.update_layout(
    width=900, height=900,
    margin=dict(l=50, r=50, t=50, b=50),
    title_x=0.5
)

# Perform PCA
features = [col for col in data.columns if col != 'State/ UT' and data[col].dtype in [int, float]]
x = data[features].dropna().values
x = (x - x.mean(axis=0)) / x.std(axis=0)  # Standardize the features

pca = PCA(n_components=2)  # Reduce to 2 components for visualization
principal_components = pca.fit_transform(x)
principal_df = pd.DataFrame(data=principal_components, columns=['Principal Component 1', 'Principal Component 2'])
final_df = pd.concat([principal_df, data[['State/ UT']].dropna().reset_index(drop=True)], axis=1)

# Create a scatter plot for PCA results
pca_fig = px.scatter(
    final_df, x='Principal Component 1', y='Principal Component 2', color='State/ UT',
    title='PCA of PHC Attributes',
    labels={'Principal Component 1': 'Principal Component 1', 'Principal Component 2': 'Principal Component 2'}
)
pca_fig.update_layout(title_x=0.5)

# Create Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# App layout
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col(html.H1("PHC Infrastructure and Functionality Analysis", className="text-center"), className="mb-4 mt-4")
    ]),
    dbc.Tabs([
        dbc.Tab(label="Descriptive Statistics", tab_id="tab-stats", children=[
            html.Div([
                dcc.Dropdown(
                    id="state-dropdown",
                    options=[{'label': state, 'value': state} for state in data['State/ UT'].unique()],
                    value=data['State/ UT'].unique()[0],
                    clearable=False,
                    style={'margin-bottom': '20px'}
                ),
                html.Div(id="stats-table"),
            ])
        ]),
        dbc.Tab(label="Correlation Analysis", tab_id="tab-correlation", children=[
            html.Div([
                dcc.Graph(figure=corr_fig),
            ], className="mb-4")
        ]),
        dbc.Tab(label="Comparative Analysis", tab_id="tab-comparison", children=[
            html.Div([
                dcc.Checklist(
                    id="attribute-checklist",
                    options=[{'label': col, 'value': col} for col in data.columns if col != 'State/ UT'],
                    value=[data.columns[2]],
                    inline=True,
                    style={'margin-bottom': '20px'}
                ),
                dcc.Graph(id="comparison-bar-chart"),
            ])
        ]),
        dbc.Tab(label="Scatter Plot Analysis", tab_id="tab-scatter", children=[
            html.Div([
                dcc.Dropdown(
                    id="x-axis-dropdown",
                    options=[{'label': col, 'value': col} for col in data.columns if col != 'State/ UT'],
                    value=data.columns[2],
                    style={'margin-bottom': '20px'}
                ),
                dcc.Dropdown(
                    id="y-axis-dropdown",
                    options=[{'label': col, 'value': col} for col in data.columns if col != 'State/ UT'],
                    value=data.columns[3],
                    style={'margin-bottom': '20px'}
                ),
                dcc.Graph(id="scatter-plot"),
            ])
        ]),
        dbc.Tab(label="Geographical Map of India", tab_id="tab-map", children=[
            html.Div([
                dcc.Dropdown(
                    id="map-attribute-dropdown",
                    options=[{'label': col, 'value': col} for col in data.columns if col not in ['State/ UT']],
                    value='Number of PHCs Functioning',  # Update default value if needed
                    style={'margin-bottom': '20px'}
                ),
                dcc.Graph(id="india-map")
            ])
        ]),
        dbc.Tab(label="PCA Analysis", tab_id="tab-pca", children=[
            html.Div([
                dcc.Graph(figure=pca_fig),
                html.Div([
                    dcc.Markdown('''
                        ### Principal Component Analysis (PCA)
                        PCA is a technique used to emphasize variation and bring out strong patterns in a dataset. 
                        It helps in reducing the dimensionality of the data while preserving most of the variability.
                        
                        - *Principal Component 1:* The direction of the greatest variance in the data.
                        - *Principal Component 2:* The direction orthogonal to the first component that captures the second highest variance.
                        
                        By plotting the first two principal components, we can visualize the structure and identify clusters or patterns in the data.
                    ''')
                ])
            ])
        ])
    ])
], fluid=True)

# Callbacks for interactive components
@app.callback(
    Output("stats-table", "children"),
    Input("state-dropdown", "value")
)
def update_stats_table(selected_state):
    state_data = data[data['State/ UT'] == selected_state]
    desc_stats = state_data.describe().transpose()
    return html.Table([
        html.Thead(html.Tr([html.Th(col) for col in desc_stats.columns])),
        html.Tbody([
            html.Tr([html.Td(desc_stats.index[i])] + [html.Td(desc_stats.iloc[i][col]) for col in desc_stats.columns])
            for i in range(len(desc_stats))
        ])
    ], className="table table-striped")

@app.callback(
    Output("comparison-bar-chart", "figure"),
    Input("attribute-checklist", "value")
)
def update_comparison_bar_chart(selected_attributes):
    if not selected_attributes:
        selected_attributes = [data.columns[2]]
    fig = px.bar(
        data, 
        x='State/ UT', 
        y=selected_attributes, 
        barmode='group',
        title='Average PHC Attributes by State/UT'
    )
    fig.update_layout(xaxis_title='State/UT', yaxis_title='Average Values', title_x=0.5)
    return fig

@app.callback(
    Output("scatter-plot", "figure"),
    [Input("x-axis-dropdown", "value"), Input("y-axis-dropdown", "value")]
)
def update_scatter_plot(x_attr, y_attr):
    fig = px.scatter(
        data,
        x=x_attr,
        y=y_attr,
        color='State/ UT',
        title=f'{x_attr} vs {y_attr}'
    )
    fig.update_layout(xaxis_title=x_attr, yaxis_title=y_attr, title_x=0.5)
    return fig

@app.callback(
    Output("india-map", "figure"),
    Input("map-attribute-dropdown", "value")
)
def update_india_map(selected_attribute):
    fig = px.choropleth(
        data_frame=data,
        geojson=geojson_data,
        locations='State/ UT',
        featureidkey='properties.st_nm',
        color=selected_attribute,
        color_continuous_scale='Viridis',
        scope='asia',
        title=f'{selected_attribute} Distribution Across India'
    )
    fig.update_geos(fitbounds="locations", visible=False)
    fig.update_layout(margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
    return fig

# Run app
if __name__ == '__main__':
    app.run_server(debug=True, port=8051)


Data States: ['Andhra Pradesh' 'Arunachal Pradesh' 'Assam' 'Bihar' 'Chhattisgarh' 'Goa'
 'Gujarat' 'Haryana' 'Himachal Pradesh' 'Jammu and Kashmir' 'Jharkhand'
 'Karnataka' 'Kerala' 'Madhya Pradesh' 'Maharashtra' 'Manipur' 'Meghalaya'
 'Mizoram' 'Nagaland' 'Odisha' 'Punjab' 'Rajasthan' 'Sikkim' 'Tamil Nadu'
 'Telangana' 'Tripura' 'Uttarakhand' 'Uttar Pradesh' 'West Bengal'
 'A & N Island' 'Chandigarh' 'Dadra & Nagar Haveli' 'Daman & Diu' 'Delhi'
 'Lakshadweep' 'Puducherry']
GeoJSON States: ['Telangana', 'Andaman and Nicobar', 'Andhra Pradesh', 'Arunachal Pradesh', 'Assam', 'Bihar', 'Chhattisgarh', 'Daman & Diu', 'Goa', 'Gujarat', 'Haryana', 'Himachal Pradesh', 'Jammu and Kashmir', 'Jharkhand', 'Karnataka', 'Kerala', 'Lakshadweep', 'Madhya Pradesh', 'Maharashtra', 'Manipur', 'Chandigarh', 'Puducherry', 'Punjab', 'Rajasthan', 'Sikkim', 'Tamil Nadu', 'Tripura', 'Uttar Pradesh', 'Uttarakhand', 'West Bengal', 'Odisha', 'Dadra and Nagar Haveli and Daman and Diu', 'Meghalaya', 'Mizoram', 'Nag