In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
import numpy as np
import warnings
import sys
import math

warnings.filterwarnings('ignore')

sys.path.append("..")

In [2]:
data_cl = pd.read_csv(f'../data/csgo_taskv2.csv')
data_rg = pd.read_csv(f'../data/trip_duration_taskv2.csv')

Data_CL

In [3]:
data_cl.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122410 entries, 0 to 122409
Data columns (total 17 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Unnamed: 0        122410 non-null  int64  
 1   time_left         122410 non-null  float64
 2   ct_score          122410 non-null  int64  
 3   t_score           122410 non-null  int64  
 4   map               122410 non-null  object 
 5   bomb_planted      122410 non-null  bool   
 6   ct_health         122410 non-null  int64  
 7   t_health          122410 non-null  int64  
 8   ct_armor          122410 non-null  int64  
 9   t_armor           122410 non-null  int64  
 10  ct_money          122410 non-null  int64  
 11  t_money           122410 non-null  int64  
 12  ct_helmets        122410 non-null  int64  
 13  t_helmets         122410 non-null  int64  
 14  ct_defuse_kits    122410 non-null  int64  
 15  ct_players_alive  122410 non-null  int64  
 16  t_players_alive   12

In [4]:
data_cl = data_cl.drop(['map'], axis=1)

In [5]:
data_cl.isna().sum()

Unnamed: 0          0
time_left           0
ct_score            0
t_score             0
bomb_planted        0
ct_health           0
t_health            0
ct_armor            0
t_armor             0
ct_money            0
t_money             0
ct_helmets          0
t_helmets           0
ct_defuse_kits      0
ct_players_alive    0
t_players_alive     0
dtype: int64

In [6]:
data_major_true = data_cl[data_cl['bomb_planted'] == True]
data_major_false = data_cl[data_cl['bomb_planted'] == False]

data_major_false_undersampled = data_major_false.sample(len(data_major_true))
data_cl = pd.concat([data_major_false_undersampled, data_major_true], axis = 0)

y_cl = data_cl['bomb_planted']
for unique in y_cl.unique():
    print(unique, y_cl.loc[y_cl == unique].count())

False 13684
True 13684


In [7]:
x_cl = data_cl.drop(['bomb_planted'], axis=1)

In [8]:
x_train_cl, x_test_cl, y_train_cl, y_test_cl = train_test_split(x_cl, y_cl, test_size=0.3)

Data_RG

In [9]:
data_rg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 729322 entries, 0 to 729321
Data columns (total 11 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Unnamed: 0         729322 non-null  int64  
 1   id                 729322 non-null  object 
 2   vendor_id          729322 non-null  int64  
 3   passenger_count    729322 non-null  int64  
 4   pickup_longitude   729322 non-null  float64
 5   pickup_latitude    727475 non-null  float64
 6   dropoff_longitude  729322 non-null  float64
 7   dropoff_latitude   729322 non-null  float64
 8   trip_duration      729322 non-null  int64  
 9   pickup_datetime    729322 non-null  object 
 10  dropoff_datetime   729322 non-null  object 
dtypes: float64(4), int64(4), object(3)
memory usage: 61.2+ MB


In [10]:
data_rg.isna().sum()

Unnamed: 0              0
id                      0
vendor_id               0
passenger_count         0
pickup_longitude        0
pickup_latitude      1847
dropoff_longitude       0
dropoff_latitude        0
trip_duration           0
pickup_datetime         0
dropoff_datetime        0
dtype: int64

In [11]:
mean_r = -0.0008561123804995453
for i in range(len(data_rg['pickup_latitude'])):
    if math.isnan(data_rg['pickup_latitude'].loc[data_rg.index[i]]):
        data_rg['pickup_latitude'].loc[data_rg.index[i]] = data_rg['dropoff_latitude'].loc[data_rg.index[i]] + mean_r
data_rg.isna().sum()

Unnamed: 0           0
id                   0
vendor_id            0
passenger_count      0
pickup_longitude     0
pickup_latitude      0
dropoff_longitude    0
dropoff_latitude     0
trip_duration        0
pickup_datetime      0
dropoff_datetime     0
dtype: int64

In [12]:
data_rg = data_rg.drop(['id'], axis=1)
data_rg = data_rg.drop(['pickup_datetime'], axis=1)
data_rg = data_rg.drop(['dropoff_datetime'], axis=1)

In [13]:
y_rg = data_rg['trip_duration']
x_rg = data_rg.drop(['trip_duration'], axis=1)

In [14]:
x_rg = x_rg[:10000]
y_rg = y_rg[:10000]

In [15]:
x_train_rg, x_test_rg, y_train_rg, y_test_rg = train_test_split(x_rg, y_rg, test_size=0.3)

In [16]:
# Regression
model_regression = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_shape=(7,)),
        tf.keras.layers.Dense(32, activation="linear"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(1, activation="linear"),
    ]
)
model_regression.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                512       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense_2 (Dense)             (None, 16)                528       
                                                                 
 dropout_1 (Dropout)         (None, 16)                0         
                                                                 
 dense_3 (Dense)             (None, 1)                 17        
                                                                 
Total params: 3137 (12.25 KB)
Trainable params: 3137 (12

In [17]:
# compile
model_regression.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.005), loss="mse")

In [18]:
model_regression.fit(x_train_rg, y_train_rg, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x2b99f9af6a0>

In [20]:
print(mean_absolute_error(y_test_rg, model_regression.predict(x_test_rg)))
print(mean_squared_error(y_test_rg, model_regression.predict(x_test_rg)))

491.56150551350913
3900742.8817113885


In [23]:
model_classification_1 = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_shape=(15,)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.05),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ]
)

model_classification_1.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="binary_crossentropy")
model_classification_1.fit(x_train_cl, y_train_cl, epochs=100, verbose=None)

<keras.src.callbacks.History at 0x2b999a595d0>

In [24]:
y_pred = np.around(model_classification_1.predict(x_test_cl, verbose=None))

print(classification_report(y_test_cl, y_pred))
print(confusion_matrix(y_test_cl, y_pred))

              precision    recall  f1-score   support

       False       0.97      0.93      0.95      4064
        True       0.93      0.97      0.95      4147

    accuracy                           0.95      8211
   macro avg       0.95      0.95      0.95      8211
weighted avg       0.95      0.95      0.95      8211

[[3764  300]
 [ 106 4041]]


In [26]:
w0 = 1 / y_train_cl[y_train_cl==0].shape[0]
w1 = 1 / y_train_cl[y_train_cl==1].shape[0]

In [30]:
model_classification_1 = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_dim = x_cl.shape[1]),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.05),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ]
)

model_classification_1.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="binary_crossentropy")
model_classification_1.fit(x_train_cl, y_train_cl, epochs=50, verbose=None, class_weight={0: w0, 1: w1})
model_classification_1.predict(x_test_cl, verbose=None)

y_pred = np.around(model_classification_1.predict(x_test_cl, verbose=None))
print(classification_report(y_test_cl, y_pred))
print(confusion_matrix(y_test_cl, y_pred))

TypeError: Value passed to parameter 'x' has DataType bool not in list of allowed values: bfloat16, float16, float32, float64, int8, int16, int32, int64, complex64, complex128

In [None]:
y_pred = np.around(model_classification_1.predict(x_test_c, verbose=None))
print(classification_report(y_test_c, y_pred))
print(confusion_matrix(y_test_c, y_pred))

In [31]:
# 2
model_classification_2 = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_dim = x_cl.shape[1]),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dense(2, activation="softmax"),
    ]
)

model_classification_2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.005), loss="sparse_categorical_crossentropy")
model_classification_2.fit(x_train_cl, y_train_cl, epochs=100, verbose=None)

<keras.src.callbacks.History at 0x2b998e10910>

In [32]:
y_pred = [np.argmax(pred) for pred in model_classification_2.predict(x_test_cl, verbose=None)]
print(classification_report(y_test_cl, y_pred))
print(confusion_matrix(y_test_cl, y_pred))

              precision    recall  f1-score   support

       False       0.49      1.00      0.66      4064
        True       0.00      0.00      0.00      4147

    accuracy                           0.49      8211
   macro avg       0.25      0.50      0.33      8211
weighted avg       0.24      0.49      0.33      8211

[[4064    0]
 [4147    0]]
