In [25]:
import yfinance as yf
import pandas as pd

In [63]:
ticker_symbol = 'BTC-USD'
df = yf.download(ticker_symbol, start='2024-01-01', end='2024-10-07', interval='60m')

[*********************100%%**********************]  1 of 1 completed


In [64]:
print(df)

                             Open          High           Low         Close  \
Datetime                                                                      
2024-01-01 00:00:00  42280.234375  42517.601562  42277.726562  42477.253906   
2024-01-01 01:00:00  42471.402344  42718.710938  42433.785156  42622.898438   
2024-01-01 02:00:00  42611.335938  42611.335938  42506.359375  42576.601562   
2024-01-01 03:00:00  42575.968750  42581.007812  42270.757812  42320.730469   
2024-01-01 04:00:00  42325.042969  42387.164062  42227.984375  42387.164062   
...                           ...           ...           ...           ...   
2024-08-11 04:00:00  61125.503906  61133.714844  60921.582031  60935.925781   
2024-08-11 05:00:00  60932.417969  61152.468750  60913.945312  61152.468750   
2024-08-11 06:00:00  61126.328125  61174.535156  61032.218750  61132.808594   
2024-08-11 07:00:00  61130.597656  61265.992188  61117.199219  61263.808594   
2024-08-11 08:00:00  61266.664062  61329.441406  611

In [65]:
df.index.name = df.index.name.lower()
df.columns = df.columns.str.lower()

In [66]:
df['year'] = df.index.year
df['month'] = df.index.month
df['hour'] = df.index.hour
df['day_name'] = df.index.day_name()

In [67]:
df['max'] = df[['open', 'high', 'low', 'close']].max(axis=1)
df['min'] = df[['open', 'high', 'low', 'close']].min(axis=1)
df['difference'] = df['max'] - df['min']

In [68]:
df.shape

(5361, 13)

In [69]:
df = df.groupby(['day_name', 'hour']).agg({'volume': 'sum'}).reset_index()

In [70]:
day_order = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']

# Convert the 'day_name' column to a categorical type with the specified order
df['day_name'] = pd.Categorical(
    df['day_name'], 
    categories=day_order, 
    ordered=True
)

df = df.sort_values(by='day_name', ascending=True)


In [71]:
df.shape

(168, 3)

In [72]:
df

Unnamed: 0,day_name,hour,volume
83,Sunday,11,3358931456
72,Sunday,0,5707345920
73,Sunday,1,5469582336
74,Sunday,2,2667146240
75,Sunday,3,5801750528
...,...,...,...
52,Saturday,4,1218656256
51,Saturday,3,721285120
50,Saturday,2,1574135808
60,Saturday,12,339269632


In [73]:
# Encoding categorical data
day_mapping = {
    'Sunday': 1,
    'Monday': 2,
    'Tuesday': 3,
    'Wednesday': 7,
    'Thursday': 5,
    'Friday': 6,
    'Saturday': 4
}

df['day_map'] = df['day_name'].map(day_mapping)

# Calculate correlation
correlation_matrix = df[['day_map', 'hour']].corr()

In [74]:
correlation_matrix

Unnamed: 0,day_map,hour
day_map,1.0,-1.429533e-16
hour,-1.429533e-16,1.0


In [75]:
tuesday_data = df[df['day_map'].isin([3])]

In [76]:
tuesday_data

Unnamed: 0,day_name,hour,volume,day_map
120,Tuesday,0,11653582848,3
130,Tuesday,10,9691023360,3
121,Tuesday,1,15517669376,3
122,Tuesday,2,20662288384,3
123,Tuesday,3,15371808768,3
124,Tuesday,4,15037345792,3
126,Tuesday,6,9205358592,3
127,Tuesday,7,10157783040,3
128,Tuesday,8,10281949184,3
129,Tuesday,9,13490001920,3


In [58]:
correlations = {}
for col in ['hour']:
  correlations[col] = df['volume'].corr(df[col])

In [59]:
df = pd.DataFrame(correlations, index=['volume'])

In [77]:
tuesday_hours_volume_sum = df.groupby(['hour']).agg({'volume': 'sum'}).reset_index()

In [78]:
tuesday_hours_volume_sum

Unnamed: 0,hour,volume
0,0,58383627264
1,1,75529482240
2,2,66901263360
3,3,62657184768
4,4,52922769408
5,5,57502845952
6,6,60356382720
7,7,67125081088
8,8,64729735168
9,9,63899938816


In [84]:
tuesday_hours_volume_sum.max().volume

112193053184

In [86]:
tuesday_hours_volume_sum['volume'].idxmax()

14

In [88]:
tuesday_hours_volume_sum.sort_values(by='volume', ascending=False)

Unnamed: 0,hour,volume
14,14,112193053184
15,15,107722426880
16,16,106544325120
20,20,100846266368
13,13,95871804928
18,18,94748262400
17,17,92032914432
19,19,89256003584
12,12,88097546240
1,1,75529482240


In [81]:
tuesday_hours_volume_sum.min()

hour                0
volume    49653697024
dtype: int64

In [82]:
tuesday_hours_volume_sum.median()

hour      1.150000e+01
volume    6.701317e+10
dtype: float64

In [83]:
tuesday_hours_volume_sum.mean()

hour      1.150000e+01
volume    7.532989e+10
dtype: float64

In [90]:
monday_data = df[df['day_name'].isin(['Monday'])]

In [91]:
monday_hours_volume_sum = monday_data.groupby(['hour']).agg({'volume': 'sum'}).reset_index()

In [92]:
monday_hours_volume_sum

Unnamed: 0,hour,volume
15,15,47278159872
14,14,45275032576
16,16,40593915904
19,19,35910535168
13,13,34949866496
17,17,31248926720
18,18,28165194752
8,8,27400722432
6,6,26899386368
20,20,26504501248


In [97]:
import matplotlib.pyplot as plt

In [None]:
monday_hours = [for hour in d]

In [None]:
colors = [color_by_hour[hour] for hour in hours]


In [None]:
plt.bar(hours, volume, color=colors)
