### Imports

In [140]:
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest

import folium
from IPython.display import display

from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

from sklearn.decomposition import PCA

from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split


import seaborn as sns


In [141]:
from sklearn.preprocessing import OneHotEncoder


In [142]:
df_info = pd.read_csv('cc_info.csv')
df_info.head()

Unnamed: 0,credit_card,city,state,zipcode,credit_card_limit
0,1280981422329509,Dallas,PA,18612,6000
1,9737219864179988,Houston,PA,15342,16000
2,4749889059323202,Auburn,MA,1501,14000
3,9591503562024072,Orlando,WV,26412,18000
4,2095640259001271,New York,NY,10001,20000


In [143]:
df_trans = pd.read_csv('transactions.csv')
df_trans.head()

Unnamed: 0,credit_card,date,transaction_dollar_amount,Long,Lat
0,1003715054175576,2015-09-11 00:32:40,43.78,-80.174132,40.26737
1,1003715054175576,2015-10-24 22:23:08,103.15,-80.19424,40.180114
2,1003715054175576,2015-10-26 18:19:36,48.55,-80.211033,40.313004
3,1003715054175576,2015-10-22 19:41:10,136.18,-80.174138,40.290895
4,1003715054175576,2015-10-26 20:08:22,71.82,-80.23872,40.166719


In [144]:
df = df_info.merge(df_trans, how = 'left', left_on = 'credit_card', right_on = 'credit_card')

In [145]:
# Sample only 10% of the data
df = df.sample(frac=0.1, random_state=14)

In [146]:
df.columns

Index(['credit_card', 'city', 'state', 'zipcode', 'credit_card_limit', 'date',
       'transaction_dollar_amount', 'Long', 'Lat'],
      dtype='object')

In [147]:
df.isna().sum()

credit_card                  0
city                         0
state                        0
zipcode                      0
credit_card_limit            0
date                         0
transaction_dollar_amount    0
Long                         0
Lat                          0
dtype: int64

In [148]:
filtered_df = df[df['credit_card'] == 1280981422329509]

# Create a map centered around the average location of the filtered data
if not filtered_df.empty:
    initial_location = [filtered_df['Lat'].mean(), filtered_df['Long'].mean()]
    transaction_map = folium.Map(location=initial_location, zoom_start=2)

    # Add markers for each transaction in the filtered data
    for index, row in filtered_df.iterrows():
        folium.Marker(
            location=[row['Lat'], row['Long']],
            tooltip=f"City: {row['city']}, State: {row['state']}",
            popup=f"Transaction: ${row['transaction_dollar_amount']} on {row['date']}"
        ).add_to(transaction_map)

    display(transaction_map)
else:
    print("No data found for the given credit card.")

In [149]:
# Convert Date to Number
unix_timestamps = []

for date in df_trans['date']:
    datetime_obj = pd.to_datetime(date)
    unix_timestamp = int(datetime_obj.timestamp())
    unix_timestamps.append(unix_timestamp)

# Add the converted dates as a new column
df_trans['unix_timestamp'] = unix_timestamps
df_trans.drop(['date'], axis=1, inplace=True)


# Standardization
df_trans_columns = df_trans.columns
std = StandardScaler()
df_trans = std.fit_transform(df_trans)
df_trans = pd.DataFrame(df_trans, columns = df_trans_columns)


In [150]:
model = IsolationForest(n_estimators=100, max_samples=0.5,
                        contamination='auto', max_features=1.0, bootstrap=False, n_jobs=None,
                        verbose=1, random_state=2020)
model.fit(df_trans)

In [151]:
Y = model.predict(df_trans)

In [152]:
Y_series = pd.Series(Y)
print(Y_series.value_counts())

 1    282758
-1     11830
dtype: int64
