In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pylab


data = pd.read_csv('C:\\Users\\rohit\\Downloads\\Book2.csv', parse_dates=['Dates'], index_col='Dates')
#Renaming The columns
fraud = data[data['Category'] == "FRAUD"]
assault = data[data['Category'] == "ASSAULT"]
FamilyOffenses = data[data['Category'] == "FAMILY OFFENSES"]
OtherOffenses = data[data['Category'] == "OTHER OFFENSES"]
vandalism = data[data['Category'] == "VANDALISM"]

#indexing type here is by weeks
data['DayOfWeek'] = data.index.dayofweek
data['Hour'] = data.index.hour
data['Month'] = data.index.month
data['Year'] = data.index.year
data['DayOfMonth'] = data.index.day

#structure size
pylab.rcParams['figure.figsize'] = (16.0, 12.0)

#use of library ggplot
plt.style.use('ggplot')

daysOfWeekIdx = data.groupby('DayOfWeek').size().keys()
daysOfWeekLit = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
occursByWeek = data.groupby('DayOfWeek').size().get_values()


# Bar plot
y = np.empty([6,7])
h = [None]*6
width = 0.1

ax2 = plt.subplot2grid((3,3), (1,0), colspan=4)

y[0] = fraud.groupby('DayOfWeek').size().get_values()
y[1] = assault.groupby('DayOfWeek').size().get_values()
y[2] = FamilyOffenses.groupby('DayOfWeek').size().get_values()
y[3] = OtherOffenses.groupby('DayOfWeek').size().get_values()
#y[4] = vandalism.groupby('DayOfWeek').size().get_values()


color_sequence = ['#1f77b4', '#ff7f0e', '#2ca02c','#d62728', '#9467bd', '#8c564b']

for i in range(0,5):
    h[i] = ax2.bar(daysOfWeekIdx + i*width, y[i], width, color=color_sequence[i], alpha = 0.7)

ax2.set_xticks(daysOfWeekIdx + 3*width)
ax2.set_xticklabels(daysOfWeekLit)
# ensure that ticks are only at the bottom and left parts of the plot
ax2.get_xaxis().tick_bottom()
ax2.get_yaxis().tick_left()

ax2.legend((item[0] for item in h), 
           ('Fraud', 'Assault', 'Family Offenses', 'Other Offenses'), 
           bbox_to_anchor=(0.0, 1), loc=3, borderaxespad=0., frameon=False)


plt.title('DayOfWeek wise occurence Crimes', fontsize = 28)
plt.show()

In [None]:
pylab.rcParams['figure.figsize'] = (16.0, 8.0)

monthsIdx = data.groupby('Month').size().keys() - 1
monthsLit = ['January', 'February', 
             'March', 'April', 'May', 
             'June', 'July','August', 
             'September', 'October', 'Novemeber', 'December']
occursByMonth = data.groupby('Month').size().get_values()

# Linear plot for all crimes
ax1 = plt.subplot2grid((3,3), (0,0), colspan=3)
ax1.plot(monthsIdx, occursByMonth, 'ro-', linewidth=2)

ax1.set_title ('All Crimes', fontsize=20)

start, end = ax1.get_xlim()
ax1.xaxis.set_ticks(np.arange(start, end, 1))
ax1.set_xticklabels(monthsLit)
# ensure that ticks are only at the bottom and left parts of the plot
ax1.get_xaxis().tick_bottom()
ax1.get_yaxis().tick_left()


In [None]:
# Linear normalized plot for 6 top crimes
ax2 = plt.subplot2grid((3,3), (1,0), colspan=3, rowspan=2)

y = np.empty([6,12])
y[0] = fraud.groupby('Month').size().get_values()
y[1] = assault.groupby('Month').size().get_values()
y[2] = FamilyOffenses.groupby('Month').size().get_values()
y[3] = OtherOffenses.groupby('Month').size().get_values()
#y[4] = vandalism.groupby('Month').size().get_values()


crimes = ['fraud', 'Assault', 'Family Offenses', 'Other Offenses']
color_sequence = ['#1f77b4', '#ff7f0e', '#2ca02c','#d62728']

for i in range(0,4):
    y[i]= (y[i]-min(y[i]))/(max(y[i])-min(y[i]))  # normalization
    h[i] = ax2.plot(monthsIdx, y[i],'o-', color=color_sequence[i], lw=2)

ax2.set_ylabel("Crime occurences by month, normalized")

ax2.xaxis.set_ticks(np.arange(start, end+2, 1))
ax2.set_xticklabels(monthsLit)

ax2.legend((item[0] for item in h), 
           crimes, 
           bbox_to_anchor=(0.87, 1), loc=2, borderaxespad=0., frameon=False)

pylab.gcf().text(0.5, 1.00, 
            'Month wise Occurence of Crimes',
            horizontalalignment='center',
            verticalalignment='top', 
             fontsize = 28)
plt.show()

In [None]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
import matplotlib.pyplot as plt
from datetime import datetime
import re 
from patsy import dmatrices
from sklearn.metrics import accuracy_score,classification_report
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import log_loss


#reading the training data set
SNF1 = pd.read_excel("C:\\Users\\rohit\\Downloads\\crime_and_day.xlsx")
SNF = SNF1.iloc[:, 0:7]
y = SNF1.iloc[:,7:9].values

In [None]:
SNF1.head()

In [None]:
scaler = preprocessing.StandardScaler()
scaler.fit(SNF[["X","Y"]])
SNF[["X","Y"]] = scaler.transform(SNF[["X","Y"]])
SNF=SNF[abs(SNF["Y"])<100]
SNF.index=range(len(SNF))
print(SNF.index)

In [None]:
#feature normalization
def normalize(data): 
    data = (data - data.min()) / (data.max() - data.min())
    return data

In [None]:
SNF['X'] = normalize(SNF.X)
SNF['Y'] = normalize(SNF.Y)

In [None]:
print(SNF['X'])

In [None]:
def parse_time(x):
    DD=datetime.strptime(str(x),"%Y-%m-%d %H:%M:%S")
    time=DD.hour
    day=DD.day
    month=DD.month
    year=DD.year
    return time, day, month, year

In [None]:
#getting season : summer, fall, winter, spring from months column
def get_season(x):
    summer=0
    fall=0
    winter=0
    spring=0
    if (x in [5, 6, 7]):
        summer=1
    if (x in [8, 9, 10]):
        fall=1
    if (x in [11, 0, 1]):
        winter=1
    if (x in [2, 3, 4]):
        spring=1
    return summer, fall, winter, spring

#getting season : summer, fall, winter, spring from months column
def get_season(x):
    summer=0
    fall=0
    winter=0
    spring=0
    if (x in [5, 6, 7]):
        summer=1
    if (x in [8, 9, 10]):
        fall=1
    if (x in [11, 0, 1]):
        winter=1
    if (x in [2, 3, 4]):
        spring=1
    return summer, fall, winter, spring

def preprocess_data(df):
    
    feature_list=df.columns.tolist()
    
    if "Id" in feature_list:
        feature_list.remove("Id")
    if "Descript" in feature_list:
        feature_list.remove("Descript")
    if "Resolution" in feature_list:
        feature_list.remove("Resolution")
    cleanData=df[feature_list]
    cleanData.index=range(len(df))
    print ("Parsing dates...")
    cleanData["Time"], cleanData["Day"], cleanData["Month"], cleanData["Year"]=zip(*cleanData["Dates"].apply(parse_time))
    
    print ("Creating season features...")
    cleanData["Summer"], cleanData["Fall"], cleanData["Winter"], cleanData["Spring"]=zip(*cleanData["Month"].apply(get_season))
    print("Creating Lat/Long feature...")
    xy_scaler = preprocessing.StandardScaler()
    xy_scaler.fit(cleanData[["X","Y"]])
    cleanData[["X","Y"]] = xy_scaler.transform(cleanData[["X","Y"]])
    #set outliers to 0
    cleanData["X"]=cleanData["X"].apply(lambda x: 0 if abs(x)>5 else x)
    cleanData["Y"]=cleanData["Y"].apply(lambda y: 0 if abs(y)>5 else y)
    print ("Creating address features...")
    #recoding address as 0: if no interaction , 1: if interaction
    cleanData["Addr"]=cleanData["Address"].apply(lambda x: 1 if "/" in x else 0)
    print ("Creating dummy variables...")
    PD = pd.get_dummies(cleanData['PdDistrict'], prefix='PD')
    #DAYOfWeek = pd.get_dummies(cleanData["DayOfWeek"], prefix='WEEK')
    TIME = pd.get_dummies(cleanData['Time'],prefix='HOUR')
    Day = pd.get_dummies(cleanData['Day'],prefix='DAY')
    Month = pd.get_dummies(cleanData['Month'],prefix='MONTH')
    Year = pd.get_dummies(cleanData['Year'],prefix='YEAR')
    
    feature_list=cleanData.columns.tolist()
    
    print ("Joining features...")
    features = pd.concat([cleanData[feature_list],PD,TIME,Day,Month,Year],axis=1)
    
    print ("Droping processed columns...")
    cleanFeatures=features.drop(["PdDistrict","Address","Dates","Time","Day","Month","Year"],\
                                axis=1,inplace=False)
    
    print('Done!')
    
    return cleanFeatures

features = preprocess_data(SNF)

In [None]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder = LabelEncoder()
y[:, 1] = labelencoder.fit_transform(y[:,1])

In [None]:
onehotencoder = OneHotEncoder(categorical_features = [0])

In [None]:
y = pd.DataFrame(y)
print(y)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test = train_test_split(features, test_size = 0.3, random_state = 0)
y1_train, y1_test = train_test_split(y,test_size = 0.3,random_state=0)

In [None]:
print(X_train)

In [None]:
print(y1_train)

In [None]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense

# Initialising the ANN
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(units =50, kernel_initializer = 'uniform', activation = 'relu', input_dim = 97))

# Adding the second hidden layer clos
classifier.add(Dense(units = 50, kernel_initializer = 'uniform', activation = 'relu'))

# Adding the output layer
classifier.add(Dense(units = 2, kernel_initializer = 'uniform', activation = 'softmax'))

# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])


In [None]:
# Fitting the ANN to the Training set
y1_test = y1_test.drop([10779])
history = classifier.fit(X_train, y1_train, batch_size = 10, epochs = 60)
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

In [None]:
print(history.history.keys())
#  "Accuracy"
plt.plot(history.history['acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# "Loss"
plt.plot(history.history['loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
fig, ax = plt.subplots()
ax.plot(y1_test)
ax.plot(y_pred)