References: https://github.com/blockchain-etl/ethereum-etl

# Part I: Install and Import Required Dependancies

In [1]:
# import libraries to print data frame
import pandas as pd
import numpy as np

# Part II: Query Data and Export CSV

## 2.1 Import the download data

In [3]:
df = pd.read_csv(
    "https://raw.githubusercontent.com/Rising-Stars-by-Sunshine/Haowen-STATS201-Final/main/data/Prediction/ETHUSD.csv")
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2015/8/7,2.83162,3.53661,2.52112,2.77212,2.77212,164329.0
1,2015/8/8,2.79376,2.79881,0.714725,0.753325,0.753325,674188.0
2,2015/8/9,0.706136,0.87981,0.629191,0.701897,0.701897,532170.0
3,2015/8/10,0.713989,0.729854,0.636546,0.708448,0.708448,405283.0
4,2015/8/11,0.708087,1.13141,0.663235,1.06786,1.06786,1463100.0


## 2.2 View the basic info and fill na

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2267 entries, 0 to 2266
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       2267 non-null   object 
 1   Open       2262 non-null   float64
 2   High       2262 non-null   float64
 3   Low        2262 non-null   float64
 4   Close      2262 non-null   float64
 5   Adj Close  2262 non-null   float64
 6   Volume     2262 non-null   float64
dtypes: float64(6), object(1)
memory usage: 124.1+ KB


In [5]:
df["Date"] = pd.to_datetime(df.Date, dayfirst=True)
df_1 = df.set_index("Date", inplace=False)
df_1.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-08-07,2.83162,3.53661,2.52112,2.77212,2.77212,164329.0
2015-08-08,2.79376,2.79881,0.714725,0.753325,0.753325,674188.0
2015-08-09,0.706136,0.87981,0.629191,0.701897,0.701897,532170.0
2015-08-10,0.713989,0.729854,0.636546,0.708448,0.708448,405283.0
2015-08-11,0.708087,1.13141,0.663235,1.06786,1.06786,1463100.0


In [6]:
# look into collection info
df_1.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-08-07,2.83162,3.53661,2.52112,2.77212,2.77212,164329.0
2015-08-08,2.79376,2.79881,0.714725,0.753325,0.753325,674188.0
2015-08-09,0.706136,0.87981,0.629191,0.701897,0.701897,532170.0
2015-08-10,0.713989,0.729854,0.636546,0.708448,0.708448,405283.0
2015-08-11,0.708087,1.13141,0.663235,1.06786,1.06786,1463100.0


In [7]:
print(df_1[df_1.isna().any(axis=1)])

            Open  High  Low  Close  Adj Close  Volume
Date                                                 
2020-04-17   NaN   NaN  NaN    NaN        NaN     NaN
2020-10-09   NaN   NaN  NaN    NaN        NaN     NaN
2020-10-12   NaN   NaN  NaN    NaN        NaN     NaN
2020-10-13   NaN   NaN  NaN    NaN        NaN     NaN
2021-10-19   NaN   NaN  NaN    NaN        NaN     NaN


In [8]:
df_2=df_1.fillna(method="bfill")
df_2.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

# Part III: Save the Necessary Queried Data

In [9]:
df_3 = df_2.sort_index()

In [10]:
df_3.to_csv("Ethereum_value.csv")

## 3.3 Data Storage Solutions




### 3.3.1 Save Directly to GitHub


https://raw.githubusercontent.com/Rising-Stars-by-Sunshine/stats201-prediction-Haowen/main/data/Queried_Data/Ethereum_value.csv

### 3.3.2 Save the Data as a .pkl file to reduce size and save on Github

In [11]:
df_3.to_pickle('Ethereum_value.pkl')

Notes: 
*   The Python version that generated the .pkl file might need to be the same as the Python for data importing



In [12]:
df_3 =pd.read_pickle('Ethereum_value.pkl')
df_3.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-08-07,2.83162,3.53661,2.52112,2.77212,2.77212,164329.0
2015-08-08,2.79376,2.79881,0.714725,0.753325,0.753325,674188.0
2015-08-09,0.706136,0.87981,0.629191,0.701897,0.701897,532170.0
2015-08-10,0.713989,0.729854,0.636546,0.708448,0.708448,405283.0
2015-08-11,0.708087,1.13141,0.663235,1.06786,1.06786,1463100.0
