# SanFrancisco Crime Classification Problem  

This problem is specified well in [this link](https://www.kaggle.com/c/sf-crime)  
This notebook is a solution for the problem.

### Setting the environment for the experiment

In [1]:
# It makes matplotlib be able to show plots in line.
%matplotlib inline

In [2]:
# Import needed modules.
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd

### Read data from the given files

In [3]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

### Setting features to use

In [4]:
feature_cols = ["X", "Y"]
label_col = "Category"

### Setting and Learning the model

In [5]:
model = RandomForestClassifier(n_estimators=100)
model.fit(train_data[feature_cols], train_data[label_col])
prediction = model.predict_proba(test_data[feature_cols])

In [6]:
labels = sorted(train_data[label_col].unique())

### Making a data frame instance to submit

In [7]:
submit = pd.DataFrame(prediction)
submit.index.names = ["id"]
submit.columns = labels

### Testing the result data frame with cross-validation method

In [8]:
from sklearn.cross_validation import KFold
from sklearn.metrics import log_loss

def multiclass_logloss_score(model, features, labels, num_folds=5):
    kfolds = KFold(len(features), num_folds)

    total_score = 0.0

    for train_index, test_index in kfolds:
        train_features = features.iloc[train_index]
        test_features = features.iloc[test_index]
        train_labels = labels.iloc[train_index]
        test_labels = labels.iloc[test_index]

        model.fit(train_features, train_labels)
        prediction = model.predict_proba(test_features)

        score = log_loss(test_labels, prediction)
        total_score += score

    total_score = total_score / num_folds
    
    return total_score

### Saving the result data frame to the submission file.

In [9]:
from time import strftime, localtime

current_time = strftime("%Y.%m.%d %H.%M.%S", localtime())

submit.to_csv("RandomForestClassifier %s.csv" % current_time)