In [18]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, dash_table, Input, Output

In [2]:
df = pd.read_csv('preprocessed_startups.csv')

In [3]:
# Make sure 'Date' is in datetime format
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Extract year
df['Year'] = df['Date'].dt.year

In [4]:
# Calculate startup age
df['startup_age'] = 2025 - df['Year']  # assuming 2025 as current year, adjust as needed

# Aggregate total funding per startup
startup_funding = df.groupby('Startup Name').agg({
    'Amount in INR(K)': 'sum',
    'startup_age': 'mean'
}).reset_index()

# Bubble chart
fig = px.scatter(
    startup_funding,
    x='startup_age',
    y='Amount in INR(K)',
    size='Amount in INR(K)',
    hover_name='Startup Name',
    size_max=60,
    title=' Funding Amount vs Startup Age',
    labels={'startup_age': 'Startup Age (Years)', 'Amount in INR(K)': 'Total Funding (INR)'}
)
fig.show()


In [5]:
# Total funding by sector
sector_funding = df.groupby('Industry Vertical')['Amount in INR(K)'].sum().sort_values(ascending=False).head(10).reset_index()

# Bar chart
fig = px.bar(
    sector_funding,
    x='Amount in INR(K)',
    y='Industry Vertical',
    orientation='h',
    title='Top 10 Most Funded Sectors',
    labels={'Amount in INR(K)': 'Total Funding (INR)', 'Industry Vertical': 'Sector'},
    text='Amount in INR(K)'
)
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()


In [6]:
industry_funding = df.groupby('Industry Vertical')['Amount in INR(K)'].sum().sort_values(ascending=False).reset_index()
fig = px.bar(industry_funding.head(10),
             x='Industry Vertical',
             y='Amount in INR(K)',
             title='Top 10 Industries by Total Funding (INR K)',
             text='Amount in INR(K)')
fig.show()


In [7]:
fig = px.histogram(df, x='Amount in INR(K)', nbins=50, title='Distribution of Funding Amounts (INR K)')
fig.update_layout(xaxis_title='Funding Amount (in INR K)', yaxis_title='Number of Startups')
fig.show()


In [8]:
city_funding = df.groupby('City Location')['Amount in INR(K)'].sum().reset_index()
fig = px.scatter(city_funding, x='City Location', y='Amount in INR(K)',
                 size='Amount in INR(K)', color='Amount in INR(K)',
                 hover_name='City Location', title='Funding by City')
fig.show()


In [9]:
fig = px.box(df, x='Investment Type', y='Amount in INR(K)', color='Investment Type',
             title='Investment Type vs Funding Amount', points='all')
fig.show()


In [23]:
# -----------------------------
# Final Dash Dashboard: Startup Funding
# -----------------------------
# Initialize Dash app
app = Dash(__name__)

# Filter options
city_options = ['All'] + sorted(df['City Location'].unique())
industry_options = ['All'] + sorted(df['Industry Vertical'].unique())
investment_options = ['All'] + sorted(df['Investment Type'].unique())
tier_options = ['All'] + sorted(df['City Tier'].unique())

# Layout
app.layout = html.Div([
    html.H1("Startup Funding Dashboard", style={'textAlign': 'center', 'margin-bottom': '20px'}),

    # KPI Cards
    html.Div([
        html.Div([
            html.H4("Total Startups"),
            html.H2(f"{df.shape[0]}")
        ], className='kpi', style={'padding':'15px', 'border':'1px solid #ddd', 'width':'22%'}),

        html.Div([
            html.H4("Total Funding (INR K)"),
            html.H2(f"{int(df['Amount in INR(K)'].sum()):,}")
        ], className='kpi', style={'padding':'15px', 'border':'1px solid #ddd', 'width':'22%'}),

        html.Div([
            html.H4("Average Funding (INR K)"),
            html.H2(f"{int(df['Amount in INR(K)'].mean()):,}")
        ], className='kpi', style={'padding':'15px', 'border':'1px solid #ddd', 'width':'22%'}),

        html.Div([
            html.H4("Max Funding (INR K)"),
            html.H2(f"{int(df['Amount in INR(K)'].max()):,}")
        ], className='kpi', style={'padding':'15px', 'border':'1px solid #ddd', 'width':'22%'})
    ], style={'display':'flex', 'justify-content':'space-around', 'margin-bottom':'30px'}),

    # Filters
    html.Div([
        html.Div([html.Label("City"), dcc.Dropdown(city_options, 'All', id='filter-city')], style={'width':'24%', 'display':'inline-block', 'padding':'10px'}),
        html.Div([html.Label("Industry"), dcc.Dropdown(industry_options, 'All', id='filter-industry')], style={'width':'24%', 'display':'inline-block', 'padding':'10px'}),
        html.Div([html.Label("Investment Type"), dcc.Dropdown(investment_options, 'All', id='filter-investment')], style={'width':'24%', 'display':'inline-block', 'padding':'10px'}),
        html.Div([html.Label("City Tier"), dcc.Dropdown(tier_options, 'All', id='filter-tier')], style={'width':'24%', 'display':'inline-block', 'padding':'10px'}),
    ]),

    html.Div([
        html.Label("Funding Scale"),
        dcc.Dropdown([
            {'label': 'Linear', 'value': 'linear'},
            {'label': 'Logarithmic', 'value': 'log'}
        ], 'linear', id='funding-scale', style={'width':'30%'})
    ], style={'padding':'10px'}),

    # Charts
    html.Div([
        dcc.Graph(id='funding-by-industry'),
        dcc.Graph(id='funding-distribution'),
        dcc.Graph(id='funding-vs-investors')
    ]),

    # Top 10 Startups Table
    html.H3("Top 10 Startups by Funding", style={'textAlign':'center', 'margin-top':'30px'}),
    dash_table.DataTable(
        id='top-startups-table',
        columns=[{"name": i, "id": i} for i in ['Startup Name','City Location','Industry Vertical','Investment Type','Amount in INR(K)','NoOfInvestors']],
        page_size=10,
        sort_action='native',
        style_table={'overflowX':'auto'},
        style_cell={'textAlign':'left', 'padding':'5px'}
    )
])

# Callback
@app.callback(
    Output('funding-by-industry', 'figure'),
    Output('funding-distribution', 'figure'),
    Output('funding-vs-investors', 'figure'),
    Output('top-startups-table', 'data'),
    Input('filter-city', 'value'),
    Input('filter-industry', 'value'),
    Input('filter-investment', 'value'),
    Input('filter-tier', 'value'),
    Input('funding-scale', 'value')
)
def update_dashboard(city, industry, investment, tier, scale):
    df_filtered = df.copy()

    if city != 'All':
        df_filtered = df_filtered[df_filtered['City Location'] == city]
    if industry != 'All':
        df_filtered = df_filtered[df_filtered['Industry Vertical'] == industry]
    if investment != 'All':
        df_filtered = df_filtered[df_filtered['Investment Type'] == investment]
    if tier != 'All':
        df_filtered = df_filtered[df_filtered['City Tier'] == tier]

    # Bar chart: Total funding by industry
    funding_industry = df_filtered.groupby('Industry Vertical')['Amount in INR(K)'].sum().reset_index()
    fig_industry = px.bar(funding_industry, x='Industry Vertical', y='Amount in INR(K)',
                          text='Amount in INR(K)', color='Amount in INR(K)', color_continuous_scale='Viridis',
                          title='Total Funding by Industry')
    fig_industry.update_yaxes(type=scale)
    fig_industry.update_traces(hovertemplate='Industry: %{x}<br>Total Funding: %{y:,} K')

    # Histogram: Funding distribution
    fig_dist = px.histogram(df_filtered, x='Amount in INR(K)', nbins=50, title='Funding Distribution (INR K)',
                            color_discrete_sequence=['#f3722c'])
    fig_dist.update_xaxes(type=scale)
    fig_dist.update_traces(hovertemplate='Funding: %{x:,} K<br>Count: %{y}')

    # Scatter: Funding vs Number of Investors
    fig_scatter = px.scatter(df_filtered, x='No Of Investors', y='Amount in INR(K)', color='Industry Vertical',
                             size='Amount in INR(K)', hover_data=['Startup Name', 'City Location'],
                             title='Funding vs Number of Investors')
    fig_scatter.update_yaxes(type=scale)

    # Top 10 startups by funding
    top_10 = df_filtered.sort_values(by='Amount in INR(K)', ascending=False).head(10)
    table_data = top_10[['Startup Name','City Location','Industry Vertical','Investment Type','Amount in INR(K)','No Of Investors']].to_dict('records')

    return fig_industry, fig_dist, fig_scatter, table_data

# Run server
if __name__ == '__main__':
    app.run(debug=True)


<IPython.core.display.Javascript object>