In [None]:
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from scipy.stats import ttest_1samp
from sklearn.linear_model import LinearRegression

In [None]:
# Import data and summary
data = pd.read_csv('crime.csv', encoding='latin-1')
data.head()

In [None]:
#Data Grooming for NaN Values in Shooting
data.SHOOTING.fillna('N', inplace=True)

In [None]:
#Time of the data when the most amount of crimes happen
sns.catplot(x='HOUR',
           kind='count',
            height=8.27, 
            aspect=3,
           data=data)
plt.xticks(size=30)
plt.yticks(size=30)
plt.xlabel('Hour', fontsize=40)
plt.ylabel('Count', fontsize=40)

In [None]:
#Day of the week when the most happen
sns.catplot(x='DAY_OF_WEEK',
           kind='count',
            height=8.27, 
            aspect=3,
           data=data)
plt.xticks(size=30)
plt.yticks(size=30)
plt.xlabel('Day of the Week', fontsize=40)
plt.ylabel('Count', fontsize=40)

In [None]:
#Day of the week when the most happen
sns.catplot(x='MONTH',
           kind='count',
            height=8.27, 
            aspect=3,
           data=data)
plt.xticks(size=30)
plt.yticks(size=30)
plt.xlabel('Month', fontsize=40)
plt.ylabel('Count', fontsize=40)

In [None]:
sns.catplot(y='OFFENSE_CODE_GROUP',
           kind='count',
            height=10, 
            aspect=1.5,
            order=data.OFFENSE_CODE_GROUP.value_counts().index,
           data=data)

In [None]:
sns.catplot(y='SHOOTING',
           kind='count',
            height=8, 
            aspect=1.5,
            order=data.SHOOTING.value_counts().index,
           data=data)

In [None]:
X = len(data.MONTH)
Z = np.random.normal(size = X)
# method 1
H,X1 = np.histogram( Z, bins = 10, normed = True )
dx = X1[1] - X1[0]
F1 = np.cumsum(H)*dx
#method 2
X2 = np.sort(Z)
F2 = np.array(range(X))/float(X)

plt.plot(X1[1:], F1)
plt.plot(X2, F2)
plt.show()

In [None]:

plt.scatter('HOUR', 'SHOOTING')
plt.xlabel('HOUR')
plt.ylabel('SHOOTING')
plt.title('Price variation based on car body type')

In [None]:
sns.pairplot(data)

In [None]:

data_mean = np.mean('MONTH')
print(data_mean)
tset, pval = ttest_1samp('MONTH', 7)
print('p-values',pval)
#95% confidence interval
if pval < 0.05:    # alpha value is 0.05 or 5%
   print("rejecting")
else:
  print("accepting")

In [None]:
data_mean = np.mean('DAY_OF_WEEK')
print(data_mean)
tset, pval = ttest_1samp('DAY_OF_WEEK', 'Saturday')
print('p-values',pval)
#95% confidence interval
if pval < 0.05:    # alpha value is 0.05 or 5%
   print("rejecting")
else:
  print("accepting")

In [None]:
data_mean = np.mean('HOUR')
print(data_mean)
tset, pval = ttest_1samp('HOUR', '15')
print('p-values',pval)
#95% confidence interval
if pval < 0.05:    # alpha value is 0.05 or 5%
   print("rejecting")
else:
  print("accepting")

In [None]:
MonthX = 'MONTH'.reshape((-1, 1))
DayOFWeekY = 'DAY_OF_WEEK'.reshape((-1, 1))
model = LinearRegression()
model.fit(MonthX, DayOFWeekY)
model = LinearRegression().fit(MonthX, DayOFWeekY)
r_sq = model.score(MonthX, DayOFWeekY)
print('coefficient of determination:', r_sq)