In [1]:
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore')

In [2]:
from oauth2client.service_account import ServiceAccountCredentials
import gspread

scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']

# add credentials to the account
creds = ServiceAccountCredentials.from_json_keyfile_name('../google_api_key.json', scope)

# authorize the clientsheet 
client = gspread.authorize(creds)


sheet = client.open('USD-EGP Time series Data')

# get the first sheet of the Spreadsheet
sheet_instance = sheet.get_worksheet(0)
records_data = sheet_instance.get_values()
df = pd.DataFrame.from_dict(records_data)
df = df.iloc[1:,:]
df

Unnamed: 0,0,1,2,3,4,5
1,12/1/2003 23:58:00,6.107,12/1/2003 23:58:00,7.3045827,12/1/2003 23:58:00,10.4942688
2,12/2/2003 23:58:00,6.0749,12/2/2003 23:58:00,7.34090916,12/2/2003 23:58:00,10.50532457
3,12/3/2003 23:58:00,6.1057,12/3/2003 23:58:00,7.39888726,12/3/2003 23:58:00,10.54515447
4,12/4/2003 23:58:00,6.1161,12/4/2003 23:58:00,7.38457914,12/4/2003 23:58:00,10.519692
5,12/5/2003 23:58:00,6.1088,12/5/2003 23:58:00,7.44174016,12/5/2003 23:58:00,10.5804416
...,...,...,...,...,...,...
6237,3/27/2023 23:58:00,30.8991,3/29/2023 23:58:00,33.4,3/29/2023 23:58:00,37.92459
6238,3/28/2023 23:58:00,30.9085,3/30/2023 23:58:00,33.69,3/30/2023 23:58:00,38.27213
6239,3/29/2023 23:58:00,30.8019,3/31/2023 23:58:00,33.39,3/31/2023 23:58:00,37.91087
6240,3/30/2023 23:58:00,30.8944,,,,


In [3]:
df.rename(columns = {0:"Date",2:"Date.1",4:"Date.2",1:"USD",3:"EUR",5:"GBP"},inplace=True)

In [4]:
df

Unnamed: 0,Date,USD,Date.1,EUR,Date.2,GBP
1,12/1/2003 23:58:00,6.107,12/1/2003 23:58:00,7.3045827,12/1/2003 23:58:00,10.4942688
2,12/2/2003 23:58:00,6.0749,12/2/2003 23:58:00,7.34090916,12/2/2003 23:58:00,10.50532457
3,12/3/2003 23:58:00,6.1057,12/3/2003 23:58:00,7.39888726,12/3/2003 23:58:00,10.54515447
4,12/4/2003 23:58:00,6.1161,12/4/2003 23:58:00,7.38457914,12/4/2003 23:58:00,10.519692
5,12/5/2003 23:58:00,6.1088,12/5/2003 23:58:00,7.44174016,12/5/2003 23:58:00,10.5804416
...,...,...,...,...,...,...
6237,3/27/2023 23:58:00,30.8991,3/29/2023 23:58:00,33.4,3/29/2023 23:58:00,37.92459
6238,3/28/2023 23:58:00,30.9085,3/30/2023 23:58:00,33.69,3/30/2023 23:58:00,38.27213
6239,3/29/2023 23:58:00,30.8019,3/31/2023 23:58:00,33.39,3/31/2023 23:58:00,37.91087
6240,3/30/2023 23:58:00,30.8944,,,,


In [5]:
df["USD"] = pd.to_numeric(df["USD"])
df["EUR"] = pd.to_numeric(df["EUR"])
df["GBP"] = pd.to_numeric(df["GBP"])

In [6]:
df["Date"] = pd.to_datetime(df['Date'], format='%m/%d/%Y %H:%M:%S',errors = "coerce")
df["Date.1"] = pd.to_datetime(df['Date.1'], format='%m/%d/%Y %H:%M:%S',errors = "coerce")
df["Date.2"] = pd.to_datetime(df['Date.2'], format='%m/%d/%Y %H:%M:%S',errors = "coerce")

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6241 entries, 1 to 6241
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    6241 non-null   datetime64[ns]
 1   USD     6241 non-null   float64       
 2   Date.1  6239 non-null   datetime64[ns]
 3   EUR     6239 non-null   float64       
 4   Date.2  6239 non-null   datetime64[ns]
 5   GBP     6239 non-null   float64       
dtypes: datetime64[ns](3), float64(3)
memory usage: 292.7 KB


#### Save for cleaning

In [8]:
df.to_csv("../data/USD-EGP Time series Data - raw.csv",index = False,date_format='%m/%d/%Y %H:%M:%S')