In [34]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [35]:
%matplotlib inline

In [36]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [37]:
# Hyper-parameters
sequence_length = 12
input_size = 6
hidden_size = 32
num_layers = 2
num_classes = 2
batch_size = 64
num_epochs = 2
learning_rate = 0.05

In [38]:
# load data Set
df_train = pd.read_csv("data_train.csv")
df_test = pd.read_csv("data_test.csv")

In [39]:
# drop velocity data

df_train = df_train.drop(columns = ["vmax","vmin","vmean"])
df_test = df_test.drop(columns= ["vmax","vmin","vmean"])

In [40]:
# add labels to each row in data_train.csv

center = torch.tensor([3750901.5068, 3770901.5068, -19268905.6133, -19208905.6133])
def label_df(df_train):
    if 3750901.5068<df_train['x_exit']<3770901.5068 and -19268905.6133<df_train['y_exit']<-19208905.6133:
        return 1
    return 0

df_train['label'] = df_train.apply(label_df,axis=1)

In [41]:
# data preprocessing
    # normalize the columns ["x_entry","y_entry","x_exit","y_exit"] (& normalize the center area)
    # convert time (xx : yy : zz) into integer
    # normalize two time columns
df_train_norm = df_train.copy()
norm_x_entry = (df_train["x_entry"] - df_train["x_entry"].mean()) / (df_train["x_entry"].max() - df_train["x_entry"])
norm_y_entry = (df_train["x_exit"] - df_train["x_exit"].mean()) / (df_train["x_exit"].max() - df_train["x_exit"])
norm_x_exit = (df_train["y_entry"] - df_train["y_entry"].mean()) / (df_train["y_entry"].max() - df_train["y_entry"])
norm_y_exit = (df_train["y_exit"] - df_train["y_exit"].mean()) / (df_train["y_exit"].max() - df_train["y_exit"])

df_train_norm["x_entry"] = norm_x_entry
df_train_norm["x_exit"] = norm_x_exit
df_train_norm["y_entry"] = norm_y_entry
df_train_norm["y_exit"] = norm_y_entry


df_train_norm["time_entry"] = df_train_norm["time_entry"].str.replace(":","").astype(int)
df_train_norm["time_exit"] = df_train_norm["time_exit"].str.replace(":","").astype(int)
norm_time_entry = (df_train_norm["time_entry"] - df_train_norm["time_entry"].mean()) / (df_train_norm["time_entry"].max() - df_train_norm["time_entry"])
norm_time_exit = (df_train_norm["time_exit"] - df_train_norm["time_exit"].mean()) / (df_train_norm["time_exit"].max() - df_train_norm["time_exit"])
df_train_norm["time_entry"] = norm_time_entry
df_train_norm["time_exit"]  = norm_time_exit


In [44]:
df_train_norm.to_csv(r"/Users/qing/School Study/Machine Learning/final project/normalied_train_data.csv")