In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

orignal_data = pd.read_csv('WILDCARD.csv')
orignal_data.head()

Unnamed: 0,PS ID,Organization,Idea ID,Team ID,Team Name,Team Leader Name,AISHE Code,Name of the Institute,City,State,Status
0,SIH1527,"AICTE, MIC-Student Innovation",139,298,Mecha minds,MD AMAAN,C-16658,Bhilai Institute of Technology Durg,Chhattisgarh,Durg,WILDCARD
1,SIH1628,Government of Punjab,456,2315,Espresso,Mudhassir Habeeb K M,C-16611,KCG College of Technology,Tamil Nadu,CHENGALPATTU,WILDCARD
2,SIH1555,Ministry of AYUSH,613,2222,Infinite Reality,Abishek J A,C-16611,KCG College of Technology,Tamil Nadu,CHENGALPATTU,WILDCARD
3,SIH1605,Bharat Electronics Limited (BEL),1660,1813,DefendHer,Abel Biju George,C-11842,"Amal Jyothi College of Engineering, Kanjirappa...",Kerala,Kottayam,WILDCARD
4,SIH1554,Ministry of Agriculture and Farmers Welfare,2159,4598,FarmCS,Abhishek Maurya,C-36022,"Lakshmi Narain College of Technology, Kalchuri...",Madhya Pradesh,Bhopal,WILDCARD


In [3]:
orignal_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65 entries, 0 to 64
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   PS ID                  65 non-null     object
 1   Organization           65 non-null     object
 2   Idea ID                65 non-null     int64 
 3   Team ID                65 non-null     int64 
 4   Team Name              65 non-null     object
 5   Team Leader Name       65 non-null     object
 6   AISHE Code             65 non-null     object
 7   Name of the Institute  65 non-null     object
 8   City                   65 non-null     object
 9   State                  65 non-null     object
 10  Status                 65 non-null     object
dtypes: int64(2), object(9)
memory usage: 5.7+ KB


In [4]:
#Removing the unnecessary columns
orignal_data =orignal_data.drop(columns=[ 'Idea ID', 'Name of the Institute','Team Leader Name',"Team ID"], axis=1)
orignal_data.head()

Unnamed: 0,PS ID,Organization,Team Name,AISHE Code,City,State,Status
0,SIH1527,"AICTE, MIC-Student Innovation",Mecha minds,C-16658,Chhattisgarh,Durg,WILDCARD
1,SIH1628,Government of Punjab,Espresso,C-16611,Tamil Nadu,CHENGALPATTU,WILDCARD
2,SIH1555,Ministry of AYUSH,Infinite Reality,C-16611,Tamil Nadu,CHENGALPATTU,WILDCARD
3,SIH1605,Bharat Electronics Limited (BEL),DefendHer,C-11842,Kerala,Kottayam,WILDCARD
4,SIH1554,Ministry of Agriculture and Farmers Welfare,FarmCS,C-36022,Madhya Pradesh,Bhopal,WILDCARD


In [5]:
#Correcting the error in city and state column
orignal_data=orignal_data.rename(columns={'City': 'Temp', 'State': 'City', 'Temp': 'State'})
orignal_data=orignal_data.rename(columns={ 'Temp': 'State'})
orignal_data.head()


Unnamed: 0,PS ID,Organization,Team Name,AISHE Code,State,City,Status
0,SIH1527,"AICTE, MIC-Student Innovation",Mecha minds,C-16658,Chhattisgarh,Durg,WILDCARD
1,SIH1628,Government of Punjab,Espresso,C-16611,Tamil Nadu,CHENGALPATTU,WILDCARD
2,SIH1555,Ministry of AYUSH,Infinite Reality,C-16611,Tamil Nadu,CHENGALPATTU,WILDCARD
3,SIH1605,Bharat Electronics Limited (BEL),DefendHer,C-11842,Kerala,Kottayam,WILDCARD
4,SIH1554,Ministry of Agriculture and Farmers Welfare,FarmCS,C-36022,Madhya Pradesh,Bhopal,WILDCARD


City Column and State column have values of each other so we have chnaged the name of both the columns.

In [6]:
orignal_data.isnull().sum()

PS ID           0
Organization    0
Team Name       0
AISHE Code      0
State           0
City            0
Status          0
dtype: int64

In [7]:
print(orignal_data.describe(include='all'))

          PS ID                   Organization    Team Name AISHE Code  \
count        65                             65           65         65   
unique       51                             25           65         37   
top     SIH1527  AICTE, MIC-Student Innovation  Mecha minds    C-16658   
freq          2                             17            1          2   

             State             City    Status  
count           65               65        65  
unique          14               28         1  
top     Tamil Nadu  Bengaluru Urban  WILDCARD  
freq            12                6        65  


In [8]:
df1=orignal_data.groupby("State").size().reset_index(name="No. Of Selected Teams").sort_values(by="No. Of Selected Teams")
df1=df1.reset_index()
df1.drop(columns="index",axis=1)

Unnamed: 0,State,No. Of Selected Teams
0,Haryana,1
1,Jammu and Kashmir,1
2,Rajasthan,1
3,Delhi,2
4,Kerala,2
5,Andhra Pradesh,2
6,Telangana,2
7,Punjab,4
8,Madhya Pradesh,5
9,Uttar Pradesh,7


In [9]:
pie_states = px.pie(
    orignal_data,
    names='State',
    title="DISTRIBUTION OF STATES(on the basis of teams selected)",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
pie_states.update_traces(textinfo='percent+label')
pie_states.show()

In [10]:
city_counts = orignal_data.groupby(['State', 'City']).size().reset_index(name='Count')
top_states = city_counts.groupby('State')['Count'].sum().nlargest(5).index
filtered_data = city_counts[city_counts['State'].isin(top_states)]

top_cities_per_state = (
    filtered_data.groupby('State')
    .apply(lambda x: x.nlargest(3, 'Count'))
    .reset_index(drop=True))

fig = px.bar(
    top_cities_per_state, 
    x='City', 
    y='Count', 
    color='State',
    text='Count',
    title='Top 3 Cities for 5 States',
    labels={'Count': 'Number of Teams selectedd', 'City': 'Name of Cities'},
    hover_data=['State'])
fig.update_traces(
    marker_line_width=1.5, 
    marker_line_color='black', 
    textposition='outside')
fig.update_layout(
    barmode='group',
    xaxis_tickangle=-45,
    title_x=0.5)
fig.show()





In [11]:
org_counts = orignal_data['Organization'].value_counts().reset_index()
org_counts.columns = ['Organization', 'Count']

fig = px.pie(
    org_counts,
    names='Organization',
    values='Count',
    title="DISTRIBUTION OF ORGANIZATION(on the basis of teams selected)",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()
