# ICO Fraud detection - First Exploratory Data Analysis (EDA)
### Objective:
The goal of this notebook is to explore the ICO data through a series of aggregations and visualizations.


In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt

In [None]:
import plotly.offline as py
import plotly.graph_objs as go
#py.init_notebook_mode(connected=True)

## 1. Reading the data files

In [None]:
fraud_folder_path = '../data_and_models/lista_ico/ico_scam/tokens/'
fraud_file_list = os.listdir(scam_folder_path)

success_folder_path = '../data_and_models/lista_ico/ico_success/tokens/'
success_file_list = os.listdir(success_folder_path)


In [None]:
scam_file_list[:10]


In [None]:
success_file_list[:10]

## 2. Loading one file

In [None]:
df_test = pd.read_csv(f'{scam_folder_path}/{scam_file_list[0]}')
df_test.head()

In [None]:
df_test.shape[0]

## 3. Loading Multiple files

In [None]:
list_multiple_files_scam = []

### 3.1. Appending the DataFrame instances to list

In [None]:
%%time
list_multiple_files_fraud= [pd.read_csv(f'{scam_folder_path}/{file}') for file in scam_file_list]
list_multiple_files_success = [pd.read_csv(f'{success_folder_path}/{file}') for file in success_file_list]

In [None]:
len(list_multiple_files_fraud)

In [None]:
len(success_file_list)

### 3.2. Create DataFrame with features for ICOs

In [None]:
list_len_dfs = [[scam_file_list[n].strip('.csv'), list_multiple_files_fraud[n].shape[0], 1] for n in range(len(list_multiple_files_scam))]

In [None]:
list_len_dfs.extend([[success_file_list[n].strip('.csv'), list_multiple_files_success[n].shape[0], 0] for n in range(len(list_multiple_files_success))])

In [None]:
len(list_len_dfs)

In [None]:
df_agg = pd.DataFrame(list_len_dfs, columns=['ico','size','fraud'])
df_agg.head()

In [None]:
import plotly.express as px
fig = px.box(df_agg, x="fraud", y="size", points="all")
fig.show()

### 3.3. Visualizing ICO total time

In [None]:
list_multiple_files_success[10].columns

In [None]:
list_time_delta = [[success_file_list[n].strip('.csv'), list_multiple_files_success[n].block_timestamp.max(), list_multiple_files_success[n].block_timestamp.min(), 
                    0] for n in range(len(list_multiple_files_success))
                  if 'block_timestamp' in list_multiple_files_success[n].columns]

In [None]:
list_time_delta.extend([[fraud_file_list[n].strip('.csv'), list_multiple_files_fraud[n].block_timestamp.max(), list_multiple_files_fraud[n].block_timestamp.min(), 
                    1] for n in range(len(list_multiple_files_fraud))
                  if 'block_timestamp' in list_multiple_files_fraud[n].columns])

In [None]:
df_time_delta = pd.DataFrame(list_time_delta, columns=['ico', 'max_time', 'min_time', 'fraud'])
df_time_delta.dtypes

In [None]:
df_time_delta.max_time = pd.to_datetime(df_time_delta.max_time)
df_time_delta.min_time = pd.to_datetime(df_time_delta.min_time)
df_time_delta.dtypes

In [None]:
df_time_delta['total_days'] = df_time_delta.max_time - df_time_delta.min_time
df_time_delta.total_days = df_time_delta.total_days.dt.days
df_time_delta.dtypes

In [None]:
df_time_delta

In [None]:
df_time_delta[df_time_delta.fraud == 0].describe()

In [None]:
df_time_delta[df_time_delta.fraud == 1].describe()

In [None]:
import plotly.express as px
fig = px.box(df_time_delta, x="fraud", y="total_days", points="all")
fig.show()

### 3.4. ICOs dynamics compared

In [None]:
list_multiple_files_fraud[0].value

In [None]:
fig = go.Figure()
for df in list_multiple_files_fraud:
    if 'value' in df.columns:
        #print(df.value)
        fig.add_trace(go.Scatter(x=df.index, y=df['value'], mode='lines'))
    
fig.update_layout(yaxis_type="log")
fig.show()

In [None]:
list_multiple_files_fraud[0]

## 4. Creating a class for ICO analysis

In [None]:
class. DeepICO:
    def __init__(self, smart_contract_file, token_file):
        self.smart_contract_file = 
        self.token_file = 