# **Bitcoin price prediction - Final predictions**
### Big Data Computing final project - A.Y. 2022 - 2023
Prof. Gabriele Tolomei

MSc in Computer Science

La Sapienza, University of Rome

### Author
Corsi Danilo - corsi.1742375@studenti.uniroma1.it



# Global constants, dependencies, libraries and tools

In [1]:
# Main constants
GDRIVE_DIR = "/content/drive"
SLOW_OPERATIONS = True

In [2]:
###################
# --- DATASET --- #
###################

# Datasets dirs
GDRIVE_DATASET_OUTPUT_DIR = GDRIVE_DIR + "/MyDrive/BDC/project/datasets/output"

# Datasets names
DATASET_TEST_NAME = "bitcoin_blockchain_data_30min_test"

# Datasets paths
GDRIVE_DATASET_TEST = GDRIVE_DATASET_OUTPUT_DIR + "/" + DATASET_TEST_NAME + ".parquet"

####################
# --- FEATURES --- #
####################

# Features dir
GDRIVE_FEATURES_DIR = GDRIVE_DIR + "/MyDrive/BDC/project/features"

# Features labels
FEATURES_LABEL = "features"
TARGET_LABEL = "next-market-price"
CHOSEN_FEATURES_LABEL = "most_corr_features"
FEATURES_NORMALIZATION = True

# Features paths
GDRIVE_CHOSEN_FEATURES = GDRIVE_FEATURES_DIR + "/" + CHOSEN_FEATURES_LABEL + ".json"

##################
# --- MODELS --- #
##################

# Model names
LR_MODEL_NAME = "LinearRegression"
GLR_MODEL_NAME = "GeneralizedLinearRegression"
RF_MODEL_NAME = "RandomForestRegressor"
GBT_MODEL_NAME = "GBTRegressor"

# Model dir
GDRIVE_MODELS_DIR = GDRIVE_DIR + "/MyDrive/BDC/project/models"

# Model path
GDRIVE_LR_MODEL = GDRIVE_MODELS_DIR + "/" + LR_MODEL_NAME
GDRIVE_GLR_MODEL = GDRIVE_MODELS_DIR + "/" + GLR_MODEL_NAME
GDRIVE_RF_MODEL = GDRIVE_MODELS_DIR + "/" + RF_MODEL_NAME
GDRIVE_GBT_MODEL = GDRIVE_MODELS_DIR + "/" + GBT_MODEL_NAME

#####################
# --- UTILITIES --- #
#####################

# Utilities dir
GDRIVE_UTILITIES_DIR = GDRIVE_DIR + "/MyDrive/BDC/project/utilities"

###################
# --- RESULTS --- #
###################

# Results dir
GDRIVE_RESULTS_DIR = GDRIVE_DIR + "/MyDrive/BDC/project/results"

# Model results path
GDRIVE_LR_MODEL_RESULTS = GDRIVE_RESULTS_DIR + "/" + LR_MODEL_NAME + ".csv"
GDRIVE_GLR_MODEL_RESULTS = GDRIVE_RESULTS_DIR + "/" + GLR_MODEL_NAME + ".csv"
GDRIVE_RF_MODEL_RESULTS = GDRIVE_RESULTS_DIR + "/" + RF_MODEL_NAME + ".csv"
GDRIVE_GBT_MODEL_RESULTS = GDRIVE_RESULTS_DIR + "/" + GBT_MODEL_NAME + ".csv"

# Final results path
GDRIVE_FINAL_RESULTS  = GDRIVE_RESULTS_DIR + "/final.csv"

In [3]:
# Point Colaboratory to Google Drive
from google.colab import drive

# Define GDrive paths
drive.mount(GDRIVE_DIR, force_remount=True)

Mounted at /content/drive


In [4]:
# Suppression of warnings for better reading
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [5]:
# Install Spark and related dependencies
!pip install pyspark

Collecting pyspark
  Downloading pyspark-3.4.1.tar.gz (310.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.8/310.8 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.4.1-py2.py3-none-any.whl size=311285387 sha256=df2ff5dc8cd17616ae8e9540e1eef9b50e33c8170dbb59e8ea4cc3afd527d498
  Stored in directory: /root/.cache/pip/wheels/0d/77/a3/ff2f74cc9ab41f8f594dabf0579c2a7c6de920d584206e0834
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.4.1


# Import files

In [6]:
# Import my files
import sys
sys.path.append(GDRIVE_UTILITIES_DIR)

from imports import *
import utilities

importlib.reload(utilities)

<module 'utilities' from '/content/drive/MyDrive/BDC/project/utilities/utilities.py'>

# Create the pyspark session

In [7]:
# Create the session
conf = SparkConf().\
                set('spark.ui.port', "4050").\
                set('spark.executor.memory', '4G').\
                set('spark.driver.memory', '45G').\
                set('spark.driver.maxResultSize', '10G').\
                set("spark.kryoserializer.buffer.max", "1G").\
                setAppName("BitcoinPricePrediction").\
                setMaster("local[*]")

# Create the context
sc = pyspark.SparkContext(conf=conf)
spark = SparkSession.builder.getOrCreate()

# Loading dataset

In [8]:
# Load datasets into pyspark dataset objects
df = spark.read.load(GDRIVE_DATASET_TEST,
                         format="parquet",
                         sep=",",
                         inferSchema="true",
                         header="true"
                    )

In [9]:
def dataset_info(dataset):
  # Print dataset
  dataset.show(3)

  # Get the number of rows
  num_rows = dataset.count()

  # Get the number of columns
  num_columns = len(dataset.columns)

  # Print the shape of the dataset
  print("Shape:", (num_rows, num_columns))

  # Print the schema of the dataset
  dataset.printSchema()

In [10]:
if SLOW_OPERATIONS:
  dataset_info(df)

+-------------------+------+------------------+--------------------+--------------------+--------------------+---------------+------------------+--------------------+------------------------+-------------------+------------------+--------------------+--------------------+------------------+-----------------+--------------------------------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+
|          timestamp|    id|      market-price|      total-bitcoins|          market-cap|        trade-volume|    blocks-size|    avg-block-size|n-transactions-total|n-transactions-per-block|          hash-rate|        difficulty|      miners-revenue|transaction-fees-usd|n-unique-addresses|   n-transactions|estimated-transaction-volume-usd|      rate-of-change|        sma-5-days|        sma-7-days|       sma-10-days|       sma-20-days|       sma-50-days|      sma-100-days| next-market-price|
+-----

# Split test dataset

In [11]:
# Retrieve the last value of the timestamp column
first_timestamp = df.select(col("timestamp")).first()[0]

# Split the test set into mini-sets of 1 week, 15 days, 1 month, and 3 months
one_week_df = df.filter(col("timestamp") <= first_timestamp + relativedelta(weeks=1))
fifteen_days_df = df.filter(col("timestamp") <= first_timestamp + relativedelta(days=15))
one_month_df = df.filter(col("timestamp") <= first_timestamp + relativedelta(months=1))
three_months_df = df.filter(col("timestamp") <= first_timestamp + relativedelta(months=3))

# Load models

In [13]:
# Load models saved in Google Drive
lr = PipelineModel.load(GDRIVE_LR_MODEL)
glr = PipelineModel.load(GDRIVE_GLR_MODEL)
rf = PipelineModel.load(GDRIVE_RF_MODEL)
gbt = PipelineModel.load(GDRIVE_GBT_MODEL)

Py4JJavaError: ignored

In [40]:
# Load models results
lr_results = pd.read_csv(GDRIVE_LR_MODEL_RESULTS)
glr_results = pd.read_csv(GDRIVE_GLR_MODEL_RESULTS)
rf_results = pd.read_csv(GDRIVE_RF_MODEL_RESULTS)
gbt_results = pd.read_csv(GDRIVE_GBT_MODEL_RESULTS)

In [41]:
# Concatenate results into Pandas Dataframe
model_results_df = pd.DataFrame(pd.concat([lr_results, glr_results, rf_results, gbt_results], ignore_index=True))

# Filter the rows where "Type" is "final_validated"
model_results_df = model_results_df[model_results_df['Type'] == 'final_validated']
model_results_df

Unnamed: 0,Model,Type,Features,Parameters,RMSE,MSE,MAE,MAPE,R2,Adjusted_R2,Time
9,LinearRegression,final_validated,most_corr_features,"[5, 0.6, 0.4]",2033.437044,4134866.0,1531.694671,0.07503,0.548514,0.548408,1.237691
20,GeneralizedLinearRegression,final_validated,most_corr_features,"[5, 0.2, 'gaussian', 'log']",7917.172891,62681630.0,6693.013194,0.33997,-2.394061,-2.394846,0.644553
31,RandomForestRegressor,final_validated,most_corr_features,"[30, 10]",1801.828082,3246584.0,1469.033233,0.06993,0.824205,0.824165,15.17469
35,GBTRegressor,final_validated,more_rel_features,"[10, 5, 0.3]",1948.869171,3798091.0,1560.728697,0.078437,0.791548,0.7915,8.099548


# Load chosen features

In [None]:
# Load choosen features
with open(GDRIVE_CHOSEN_FEATURES, "r") as f:
    CHOSEN_FEATURES = json.load(f)
print(CHOSEN_FEATURES)

['market-price', 'market-cap', 'miners-revenue', 'sma-5-days', 'sma-7-days', 'sma-10-days', 'estimated-transaction-volume-usd', 'n-transactions-total', 'blocks-size', 'sma-100-days', 'total-bitcoins', 'sma-20-days']


# Test models

In [None]:
'''
Description: Evaluate final model by making predictions on the test set
Args:
    dataset: The dataSet which needs to be splited
    dataset_name: Name of selected dataset [one_week | fifteen_days | one_month | three_months]
    model: Trained model
    model_name: Model name selected
    features_normalization: Indicates whether features should be normalized (True) or not (False)
    features: Features to be used to make predictions
    features_name: Name of features used
    features_label: The column name of features
    target_label: The column name of target variable
Return:
    results_df: Results obtained from the evaluation
    predictions: Predictions obtained from the model
'''
def evaluate_final_model(dataset, dataset_name, model, model_name, features_normalization, features, features_label, target_label):
    # Select the type of features to be used
    dataset = utilities.select_features(dataset, features_normalization, features, features_label, target_label)

    # Chain assembler and model in a Pipeline
    pipeline = Pipeline(stages=[model])

    # Train a model and calculate running time
    start = time.time()
    pipeline_model = pipeline.fit(dataset)
    end = time.time()

    # Make predictions
    predictions = pipeline_model.transform(dataset).select(target_label, "prediction", 'timestamp')

    # Compute validation error by several evaluators
    eval_res = utilities.model_evaluation(target_label, predictions)

    # Use dict to store each result
    results = {
        "Model": model_name,
        "Dataset": dataset_name,
        "RMSE": eval_res['rmse'],
        "MSE": eval_res['mse'],
        "MAE": eval_res['mae'],
        "MAPE": eval_res['mape'],
        "R2": eval_res['r2'],
        "Adjusted_R2": eval_res['adj_r2'],
        "Time": end - start,
    }

    # Transform dict to pandas dataset
    results_pd = pd.DataFrame(results, index=[0])

    return results_pd, predictions

In [None]:
# Linear regression
lr_res_one_week, lr_pred_one_week = evaluate_final_model(one_week_df, "one_week", lr, LR_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
lr_res_fifteen_days, lr_pred_fifteen_days = evaluate_final_model(fifteen_days_df, "fifteen_days", lr, LR_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
lr_res_one_month, lr_pred_one_month = evaluate_final_model(one_month_df, "one_month", lr, LR_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
lr_res_three_months, lr_pred_three_months = evaluate_final_model(three_months_df, "three_months", lr, LR_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)

In [None]:
# Generalized linear regression
glr_res_one_week, glr_pred_one_week = evaluate_final_model(one_week_df, "one_week", glr, GLR_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
glr_res_fifteen_days, glr_pred_fifteen_days = evaluate_final_model(fifteen_days_df, "fifteen_days", glr, GLR_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
glr_res_one_month, glr_pred_one_month = evaluate_final_model(one_month_df, "one_month", glr, GLR_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
glr_res_three_months, glr_pred_three_months = evaluate_final_model(three_months_df, "three_months", glr, GLR_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)

In [None]:
# Random forest regressor
rf_res_one_week, rf_pred_one_week = evaluate_final_model(one_week_df, "one_week", rf, RF_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
rf_res_fifteen_days, rf_pred_fifteen_days = evaluate_final_model(fifteen_days_df, "fifteen_days", rf, RF_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
rf_res_one_month, rf_pred_one_month = evaluate_final_model(one_month_df, "one_month", rf, RF_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
rf_res_three_months, rf_pred_three_months = evaluate_final_model(three_months_df, "three_months", rf, RF_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)

In [None]:
# Gradient boosting tree regressor
gbt_res_one_week, gbt_pred_one_week = evaluate_final_model(one_week_df, "one_week", gbt, GBT_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
gbt_res_fifteen_days, gbt_pred_fifteen_days = evaluate_final_model(fifteen_days_df, "fifteen_days", gbt, GBT_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
gbt_res_one_month, gbt_pred_one_month = evaluate_final_model(one_month_df, "one_month", gbt, GBT_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)
gbt_res_three_months, gbt_pred_three_months = evaluate_final_model(three_months_df, "three_months", gbt, GBT_MODEL_NAME, FEATURES_NORMALIZATION, CHOSEN_FEATURES, FEATURES_LABEL, TARGET_LABEL)

# Models comparison

In [None]:
def show_results(df, lr_predictions, glr_predictions, rf_predictions, gbt_predictions, title):
  trace1 = go.Scatter(
      x = df['timestamp'],
      y = df['next-market-price'].astype(float),
      mode = 'lines',
      name = 'Next Market price (usd)'
  )

  trace2 = go.Scatter(
      x = lr_predictions['timestamp'],
      y = lr_predictions['prediction'].astype(float),
      mode = 'lines',
      name = 'Linear Regression predictions'
  )

  trace2 = go.Scatter(
      x = lr_predictions['timestamp'],
      y = lr_predictions['prediction'].astype(float),
      mode = 'lines',
      name = 'Linear Regression predictions'
  )

  trace3 = go.Scatter(
      x = glr_predictions['timestamp'],
      y = glr_predictions['prediction'].astype(float),
      mode = 'lines',
      name = 'Generalized Linear Regression predictions'
  )

  trace4 = go.Scatter(
      x = rf_predictions['timestamp'],
      y = rf_predictions['prediction'].astype(float),
      mode = 'lines',
      name = 'Random Forest Regressor predictions'
  )

  trace5 = go.Scatter(
      x = gbt_predictions['timestamp'],
      y = gbt_predictions['prediction'].astype(float),
      mode = 'lines',
      name = 'GBTRegressor predictions'
  )

  layout = dict(
      title=title + " predictions",
      xaxis=dict(
          rangeselector=dict(
              buttons=list([
                  # Change the count to desired amount of months.
                  dict(count=1,
                      label='1m',
                      step='month',
                      stepmode='backward'),
                  dict(count=6,
                      label='6m',
                      step='month',
                      stepmode='backward'),
                  dict(count=12,
                      label='1y',
                      step='month',
                      stepmode='backward'),
                  dict(count=36,
                      label='3y',
                      step='month',
                      stepmode='backward'),
                  dict(step='all')
              ])
          ),
          rangeslider=dict(
              visible = True
          ),
          type='date'
      )
  )

  data = [trace1,trace2,trace3, trace4, trace5]
  fig = dict(data=data, layout=layout)
  iplot(fig, filename = title + " predictions")

In [None]:
# Define model information and evaluators to show
model_info = ['Model', "Dataset"]
evaluator_lst = ['RMSE', 'MSE', 'MAE', 'MAPE', 'R2', 'Adjusted_R2', 'Time']

# Define the results to show
one_week_comparison_lst = [lr_res_one_week, glr_res_one_week, rf_res_one_week, gbt_res_one_week]
fifteen_days_comparison_lst = [lr_res_fifteen_days, glr_res_fifteen_days, rf_res_fifteen_days, gbt_res_fifteen_days]
one_month_comparison_lst = [lr_res_one_month, glr_res_one_month, rf_res_one_month, gbt_res_one_month]
three_months_comparison_lst = [lr_res_three_months, glr_res_three_months, rf_res_three_months, gbt_res_three_months]

In [None]:
# # Show the one week comparison table
# pd.concat([utilities.model_comparison(cv_result, model_info, evaluator_lst) for cv_result in one_week_comparison_lst])

Unnamed: 0,Model,Dataset,RMSE,MSE,MAE,MAPE,R2,Adjusted_R2,Time
0,LinearRegression,one_week,2727.735549,7440541.0,2567.881738,0.093499,-56.902597,-57.424242,0.000944
0,GeneralizedLinearRegression,one_week,3122.767744,9751678.0,2799.731689,0.101989,-74.887961,-75.571637,0.000164
0,RandomForestRegressor,one_week,529.141116,279990.3,372.995424,0.013581,-1.178896,-1.198526,0.000127
0,GBTRegressor,one_week,642.342188,412603.5,485.513868,0.017625,-2.210897,-2.239824,0.000143


In [None]:
show_results(one_week_df.toPandas(), lr_pred_one_week.toPandas(), glr_pred_one_week.toPandas(), rf_pred_one_week.toPandas(), gbt_pred_one_week.toPandas(), "One week")

In [None]:
# # Show the fifteen days comparison table
# pd.concat([utilities.model_comparison(cv_result, model_info, evaluator_lst) for cv_result in fifteen_days_comparison_lst])

Unnamed: 0,Model,Dataset,RMSE,MSE,MAE,MAPE,R2,Adjusted_R2,Time
0,LinearRegression,fifteen_days,2408.925187,5802921.0,2239.092952,0.082791,-14.915846,-14.982439,0.000151
0,GeneralizedLinearRegression,fifteen_days,2955.457822,8734731.0,2714.64019,0.100702,-22.957011,-23.057249,0.000139
0,RandomForestRegressor,fifteen_days,867.251223,752124.7,660.656258,0.02487,-1.062875,-1.071506,0.000138
0,GBTRegressor,fifteen_days,704.503546,496325.2,549.870574,0.020535,-0.361286,-0.366982,0.000126


In [None]:
show_results(fifteen_days_df.toPandas(), lr_pred_fifteen_days.toPandas(), glr_pred_fifteen_days.toPandas(), rf_pred_fifteen_days.toPandas(), gbt_pred_fifteen_days.toPandas(), "Fifteen days")

In [None]:
# # Show the one month comparison table
# pd.concat([utilities.model_comparison(cv_result, model_info, evaluator_lst) for cv_result in one_month_comparison_lst])

Unnamed: 0,Model,Dataset,RMSE,MSE,MAE,MAPE,R2,Adjusted_R2,Time
0,LinearRegression,one_month,2709.021967,7338800.0,2407.097801,0.085841,-1.739208,-1.744741,0.000149
0,GeneralizedLinearRegression,one_month,2956.568088,8741295.0,2743.838284,0.099606,-2.262689,-2.26928,8.1e-05
0,RandomForestRegressor,one_month,1245.727276,1551836.0,1008.049379,0.036032,0.420777,0.419607,0.00016
0,GBTRegressor,one_month,962.930881,927235.9,816.603424,0.029649,0.653909,0.65321,0.000135


In [None]:
show_results(one_month_df.toPandas(), lr_pred_one_month.toPandas(), glr_pred_one_month.toPandas(), rf_pred_one_month.toPandas(), gbt_pred_one_month.toPandas(), "One month")

In [None]:
# # Show the three months comparison table
# pd.concat([utilities.model_comparison(cv_result, model_info, evaluator_lst) for cv_result in three_months_comparison_lst])

Unnamed: 0,Model,Dataset,RMSE,MSE,MAE,MAPE,R2,Adjusted_R2,Time
0,LinearRegression,three_months,3400.25672,11561750.0,3142.659574,0.107802,-2.851975,-2.854594,0.000128
0,GeneralizedLinearRegression,three_months,3293.583983,10847700.0,3019.917126,0.104945,-2.614078,-2.616536,0.000134
0,RandomForestRegressor,three_months,1732.434531,3001329.0,1558.576642,0.054001,6.1e-05,-0.000619,0.000151
0,GBTRegressor,three_months,1229.49672,1511662.0,1037.599677,0.036544,0.496366,0.496024,0.000129


In [None]:
show_results(three_months_df.toPandas(), lr_pred_three_months.toPandas(), glr_pred_three_months.toPandas(), rf_pred_three_months.toPandas(), gbt_pred_three_months.toPandas(), "Three months")

# Summary

In [None]:
def scatter_plot(dataset, x_axis, y_axis, title):
  fig = px.scatter(dataset, x=x_axis, y=y_axis, color=y_axis)
  fig.update_layout(title=title)
  fig.update_yaxes(showticklabels=False)
  fig.update_yaxes(title='')
  fig.show()

In [17]:
# # Define the final comparison table to show
# comparison_lst = [lr_res_one_week, glr_res_one_week, rf_res_one_week, gbt_res_one_week,
#                   lr_res_fifteen_days, glr_res_fifteen_days, rf_res_fifteen_days, gbt_res_fifteen_days,
#                   lr_res_one_month, glr_res_one_month, rf_res_one_month, gbt_res_one_month,
#                   lr_res_three_months, glr_res_three_months, rf_res_three_months, gbt_res_three_months
#                   ]

# # Show the final comparison table
# comparison_lst_df = pd.concat([utilities.model_comparison(cv_result, model_info, evaluator_lst) for cv_result in comparison_lst])
# comparison_lst_df

NameError: ignored

In [None]:
# model_results = pd.concat([lr_results, glr_results, rf_results, gbt_results], ignore_index=True)
# model_results

In [None]:
# Plot RMSE value for each final model
scatter_plot(comparison_lst_df, "RMSE", "Model", "RMSE value for each final model")

In [None]:
def scatter_plot(dataset, x_axis, y_axis, legend, title):
  fig = px.scatter(dataset, x=x_axis, y=y_axis, color=legend)
  fig.update_layout(title=title)
  fig.show()

In [None]:
scatter_plot(comparison_lst_df, "RMSE", "Dataset", "Model", "RMSE value for each model (and dataset)")

In [None]:
comparison_lst_df.to_csv(GDRIVE_FINAL_RESULTS, index=False)