![Retail Store](https://dailytimes.com.pk/assets/uploads/2017/10/24/Crime-scene-1280x720.jpg)

# **1- Importing required libraries**

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from wordcloud import WordCloud

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **2- Reading data**

In [None]:
data = pd.read_csv("/kaggle/input/montreal-crime-data/Montreal Crime Data.csv")

In [None]:
data.head()

# **3- Removing unwanted column**

In [None]:
data.drop("Unnamed: 0", axis = 1, inplace = True)

In [None]:
data.columns

# **4- Checking meta information of datafrme**

## **4.1- Shape of dataframe**

In [None]:
print(f"Shape of data is: {data.shape}")

## **4.2- Datatype of each feature**

In [None]:
pd.DataFrame(data.dtypes, columns=['Datatype']).rename_axis('Columns')

**Date column is not in date_time, So solving this issue**

# **5- Data Cleaning**

## **5.1- Converting date feature to datatime format**

In [None]:
data['date'] = pd.to_datetime(data['date'])

In [None]:
pd.DataFrame(data.dtypes, columns=['Datatype']).rename_axis('Columns')

## **5.2- Checking for NaN values**

In [None]:
pd.DataFrame(data.isnull().sum(), columns=['Missing Values']).rename_axis('Feature')

**So none of the column contains NaN value**

# **6- Visualization**

## **6.1- Top 10 Cities where most Crime are reported**

In [None]:
top_cities = pd.DataFrame(data['city'].value_counts()).rename({"city":"Case Reported"}, axis = 1).rename_axis("City").head(10)

top_cities.style.bar()

In [None]:
fig = px.bar(data_frame = np.log(top_cities),
             x = top_cities.index,
             y = 'Case Reported',
             color='Case Reported',
             template='plotly_dark',             
             title = "<b>Top 10 Cities where most Crime are reported</b>")

fig.show()

In [None]:
label = top_cities.index
value = top_cities['Case Reported']

fig = go.Figure(data=[go.Pie(labels = label, values = value, rotation = 90)])

fig.update_traces(textposition='inside',
                  textinfo='percent+label',
                  marker=dict(line=dict(color='#000000', width = 1.5)))

fig.update_layout(title_text='<b>Top 10 Cities where most Crime are reported</b>',
                  title_x=0.5,
                  title_font=dict(size=20),
                  uniformtext_minsize=15)

fig.show()

## **6.2- Visualizing category feature**

In [None]:
fig = px.histogram(data,
                   x = 'category',
                   color='city')

fig.show()

## **6.3- Crime category in Each city**

In [None]:
fig = px.treemap(data, path=['city', 'category'],   
                  hover_data = ['category'],)


fig.show()

**From above two charts Montreal is a city where most Crime is reported. And the categories of crime in Montreal is:**
1. Home invasion
2. Theft in / from a motor vehicle
3. Mischief
4. Motor vehicle theft 
5. Confirmed Theft


## **6.4- Year in which most Crime is Reported**

In [None]:
pd.DataFrame(data['year'].value_counts()).rename({"year":"Case Reported"}, axis = 1).rename_axis("Year").style.highlight_max()

**Most cases is reported in year 2015**

## **6.5- Year in which less Crime is Reported**

In [None]:
pd.DataFrame(data['year'].value_counts()).rename({"year":"Case Reported"}, axis = 1).rename_axis("Year").style.highlight_min()

**In 2021, less Crime are reported**

## **6.6- Trend of Crime Year wise**

In [None]:
fig = px.histogram(data,
             x='year',
             color="category",
             barmode='group')

fig.show()

## **6.7- Trend of Crime from 2015 to 2021 (Whether Crime is reduced or increase)**

In [None]:
year_wise_trend = data.groupby('year').sum().drop(['longitude', 'latitude'], axis = 1).rename({'count':"Case Reported"}, axis=1)

year_wise_trend.style.bar()

In [None]:
fig = px.line(year_wise_trend, x = year_wise_trend.index,
              y = 'Case Reported',
              text = year_wise_trend.index,
              title = "<b>Trend of Crime from 2015 to 2021</b>",
              template='ggplot2')

fig.update_traces(textposition="bottom right")
fig.show()

In [None]:
fig = px.bar(data_frame = year_wise_trend,
             x = year_wise_trend.index,
             y = 'Case Reported',
             color='Case Reported',
             template='plotly_dark',
             title = "<b>Trend of Crime from 2015 to 2021</b>")

fig.show()

### **From above insights, the Crime rate is reduced after 2015** 

In [None]:
def plot_word_cloud(df, col_name):    
    text = ' '.join(df[col_name].str.lower())

    wordcloud = WordCloud(width = 2000, height = 900,
                          background_color ='black',
                          collocations=False,
                          max_words=5000,
                          min_font_size = 15).generate(text)

    plt.figure(figsize=(12, 8), facecolor = 'k', edgecolor = 'k' )
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.tight_layout(pad = 0) 
    plt.show()

## **7- Top words in Crime category**

In [None]:
plot_word_cloud(data, 'category')

## **8- Top words in City**

In [None]:
plot_word_cloud(data, 'city')

<div style="color:black;
           display:fill;
           border-radius:5px;
           background-color:#6cdcf5;
           font-size:100%;
           font-family:Verdana;
           letter-spacing:0.7px">


<h2 style="text-align:center">I Hope you like this kernel. If so, don't forget to upvote and leave your valuable comment. Thank you 😊</h2>


</div>