In [57]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from joblib import Parallel, delayed
import gc

import ipywidgets as widgets
from IPython.display import display, display_markdown
pd.options.display.max_columns = None
pd.options.display.max_rows = None
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

import warnings
warnings.filterwarnings('ignore')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [11]:
df = pd.read_csv("/content/CrimesOnWomenData.csv")

In [13]:
df

Unnamed: 0.1,Unnamed: 0,State,Year,Rape,K&A,DD,AoW,AoM,DV,WT
0,0,ANDHRA PRADESH,2001,871,765,420,3544,2271,5791,7
1,1,ARUNACHAL PRADESH,2001,33,55,0,78,3,11,0
2,2,ASSAM,2001,817,1070,59,850,4,1248,0
3,3,BIHAR,2001,888,518,859,562,21,1558,83
4,4,CHHATTISGARH,2001,959,171,70,1763,161,840,0
...,...,...,...,...,...,...,...,...,...,...
731,731,D&N Haveli,2021,1250,4083,141,2068,417,4731,4
732,732,Daman & Diu,2021,315,904,16,1851,10,501,1
733,733,Delhi UT,2021,2,1,0,5,1,9,0
734,734,Lakshadweep,2021,0,0,0,1,1,3,0


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 736 entries, 0 to 735
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  736 non-null    int64 
 1   State       736 non-null    object
 2   Year        736 non-null    int64 
 3   Rape        736 non-null    int64 
 4   K&A         736 non-null    int64 
 5   DD          736 non-null    int64 
 6   AoW         736 non-null    int64 
 7   AoM         736 non-null    int64 
 8   DV          736 non-null    int64 
 9   WT          736 non-null    int64 
dtypes: int64(9), object(1)
memory usage: 57.6+ KB


In [17]:
df.sample(5)

Unnamed: 0.1,Unnamed: 0,State,Year,Rape,K&A,DD,AoW,AoM,DV,WT
634,634,Gujarat,2019,528,922,9,1048,16,3619,24
308,308,ASSAM,2010,1721,2767,175,1400,20,5410,0
32,32,LAKSHADWEEP,2001,0,0,0,0,0,0,0
561,561,Goa,2017,76,61,1,134,31,21,1
592,592,Andhra Pradesh,2018,971,697,140,4445,1802,6831,152


In [19]:
df.isnull().sum()

Unnamed: 0,0
Unnamed: 0,0
State,0
Year,0
Rape,0
K&A,0
DD,0
AoW,0
AoM,0
DV,0
WT,0


In [21]:
df.duplicated().sum()

0

In [23]:
df.head()

Unnamed: 0.1,Unnamed: 0,State,Year,Rape,K&A,DD,AoW,AoM,DV,WT
0,0,ANDHRA PRADESH,2001,871,765,420,3544,2271,5791,7
1,1,ARUNACHAL PRADESH,2001,33,55,0,78,3,11,0
2,2,ASSAM,2001,817,1070,59,850,4,1248,0
3,3,BIHAR,2001,888,518,859,562,21,1558,83
4,4,CHHATTISGARH,2001,959,171,70,1763,161,840,0


In [25]:
df.tail()

Unnamed: 0.1,Unnamed: 0,State,Year,Rape,K&A,DD,AoW,AoM,DV,WT
731,731,D&N Haveli,2021,1250,4083,141,2068,417,4731,4
732,732,Daman & Diu,2021,315,904,16,1851,10,501,1
733,733,Delhi UT,2021,2,1,0,5,1,9,0
734,734,Lakshadweep,2021,0,0,0,1,1,3,0
735,735,Puducherry,2021,2,0,2,31,3,12,0


In [27]:
df.size

7360

In [29]:
df.shape

(736, 10)

In [31]:
df.ndim

2

In [34]:
df.dtypes

Unnamed: 0,0
Unnamed: 0,int64
State,object
Year,int64
Rape,int64
K&A,int64
DD,int64
AoW,int64
AoM,int64
DV,int64
WT,int64


In [37]:
df.describe

In [39]:
df.columns

Index(['Unnamed: 0', 'State', 'Year', 'Rape', 'K&A', 'DD', 'AoW', 'AoM', 'DV',
       'WT'],
      dtype='object')

In [42]:
df["State"].unique()

array(['ANDHRA PRADESH', 'ARUNACHAL PRADESH', 'ASSAM', 'BIHAR',
       'CHHATTISGARH', 'GOA', 'GUJARAT', 'HARYANA', 'HIMACHAL PRADESH',
       'JAMMU & KASHMIR', 'JHARKHAND', 'KARNATAKA', 'KERALA',
       'MADHYA PRADESH', 'MAHARASHTRA', 'MANIPUR', 'MEGHALAYA', 'MIZORAM',
       'NAGALAND', 'ODISHA', 'PUNJAB', 'RAJASTHAN', 'SIKKIM',
       'TAMIL NADU', 'TRIPURA', 'UTTAR PRADESH', 'UTTARAKHAND',
       'WEST BENGAL', 'A & N ISLANDS', 'CHANDIGARH', 'D & N HAVELI',
       'DAMAN & DIU', 'LAKSHADWEEP', 'PUDUCHERRY', 'Andhra Pradesh',
       'Arunachal Pradesh', 'Assam', 'Bihar', 'Chhattisgarh', 'Goa',
       'Gujarat', 'Haryana', 'Himachal Pradesh', 'Jammu & Kashmir',
       'Jharkhand', 'Karnataka', 'Kerala', 'Madhya Pradesh',
       'Maharashtra', 'Manipur', 'Meghalaya', 'Mizoram', 'Nagaland',
       'Odisha', 'Punjab', 'Rajasthan', 'Sikkim', 'Tamil Nadu',
       'Telangana', 'Tripura', 'Uttar Pradesh', 'Uttarakhand',
       'West Bengal', 'A & N Islands', 'Chandigarh', 'D&N Haveli',
  

In [59]:
df["State"].nunique()

70

In [62]:
df.State = list(map(str.title, df.State))

In [64]:
options = list(df.State.unique())
dropdown = widgets.Dropdown(
    options=options,
    value=options[0],
    description='Select State'
)
def on_change(change):
    print(f'Selected value: {change["new"]}')
dropdown.observe(on_change, names='value')

In [70]:
display(dropdown)

Dropdown(description='Select State', options=('Andhra Pradesh', 'Arunachal Pradesh', 'Assam', 'Bihar', 'Chhatt…

In [76]:
options = list(df.Year.unique())
dropdown_year = widgets.Dropdown(
    options=options,
    value=options[0],
    description='Select Year'
)
def on_change(change):
    print(f'Selected value: {change["new"]}')
dropdown.observe(on_change, names='value')

In [77]:
display(dropdown_year)

Dropdown(description='Select Year', options=(2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,…