# Machine Learning

### Imports

In [None]:
import pandas as pd
import numpy as np
import requests
import json
import random

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from datetime import datetime, timedelta
from warnings import simplefilter

### 1. Reading data from CSV to DataFrame

In [None]:
simplefilter(action='ignore', category=FutureWarning)

df = pd.read_csv('FactCrimes.csv')
df = df.drop('Neighborhood', axis=1)
df = df.drop('DateKey', axis=1)
df.head()

### 2. Using LabelEncoder for encoding incidentCategory values to numerical 

In [None]:
le = LabelEncoder()
le.fit(df['IncidentCategory'])
df['IncidentCategory'] = le.transform(df['IncidentCategory'])
df.head()

### 3. Define X and y and train/test-split

In [None]:
y = df['IncidentCategory'].values
X = df[['Temperature','Humidity','Clouds','Wind']].values
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.3)

### 4. Create all the models and append them to models list

In [None]:
reg_strength = 0.02
models = []

models.append(KNeighborsClassifier(n_neighbors = 72))   # default = 5
models.append(DecisionTreeClassifier(max_depth=1))
models.append(GaussianNB())
models.append(RandomForestClassifier(max_depth=1))
models.append(LogisticRegression(C=1/reg_strength, solver="liblinear"))

random.shuffle(models)

### 5. Training and reviewing all models in for loop

In [None]:
best_model = {'model': None, 'score': 0}

for model in models:
    print(f'Training model {model}')
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    acc = accuracy_score(y_test,predictions)
    if acc > best_model['score']:
        best_model.update({'model': model})
        best_model.update({'score': acc})
    print(f'Model accuracy: {acc} \n')
    
print(best_model)

### 6. Bit of time-related calculations

In [None]:
aikaero = timedelta(hours=9)
now_sf = datetime.now() - aikaero
predict_hour = now_sf + timedelta(hours=1)
predict_hour = predict_hour.strftime("%H")

### 7. Calling API for weather forecast and converting received JSON to DataFrame

In [None]:
URL = 'https://api.open-meteo.com/v1/forecast?latitude=37.77&longitude=-122.42&hourly=temperature_2m,relativehumidity_2m,cloudcover,windspeed_10m&windspeed_unit=ms&forecast_days=1&timezone=America%2FLos_Angeles'

response = requests.get(URL)
json_data = response.json()

forecast = pd.DataFrame({
    'temperature': json_data['hourly']['temperature_2m'],
    'humidity': json_data['hourly']['relativehumidity_2m'],
    'cloud': json_data['hourly']['cloudcover'],
    'wind': json_data['hourly']['windspeed_10m']
})
forecast.head()

### 8. Predicting upcoming CrimeWeather®

In [None]:
t = forecast.loc[int(predict_hour)]
f = [t.temperature, t.humidity, t.cloud, t.wind]
f = np.array(f).reshape(1,-1)

model = best_model['model']
pred = model.predict(f)[0]
prediction = le.inverse_transform([pred])

print(f'Todennäköisin rikos: {prediction[0]}')

### 9. Resolutions