In [4]:
import pandas as pd

#Load the csv data
df = pd.read_csv("btc_quotes.csv")
df.head()

Unnamed: 0,timestamp,price,volume_24h,percent_change_1h,percent_change_24h,percent_change_7d
0,2025-06-14T03:07:15,105375.288025,55426900000.0,-0.11418,1.461239,0.608094
1,2025-06-14T03:27:10,105369.325899,55725330000.0,-0.145567,1.039515,0.471448
2,2025-06-14T15:50:20,104843.15503,40012820000.0,-0.096693,-0.749482,-0.722681
3,2025-06-14T15:50:29,104843.15503,40012820000.0,-0.096693,-0.749482,-0.722681
4,2025-06-14T15:50:31,104843.15503,40012820000.0,-0.096693,-0.749482,-0.722681


In [6]:
# Check data types
df.dtypes

timestamp              object
price                 float64
volume_24h            float64
percent_change_1h     float64
percent_change_24h    float64
percent_change_7d     float64
dtype: object

In [7]:
# Check shape and duplicates
print("Shape:", df.shape)
print("Duplicate rows:", df.duplicated().sum())

Shape: (74, 6)
Duplicate rows: 1


In [13]:
# Strip whitespace just in case
df['timestamp'] = df['timestamp'].str.strip()

# Remove the "+00:00" from any timestamps
df['timestamp'] = df['timestamp'].str.replace(r'\+00:00', '', regex=True)
df

Unnamed: 0,timestamp,price,volume_24h,percent_change_1h,percent_change_24h,percent_change_7d
0,2025-06-14T03:07:15,105375.288025,5.542690e+10,-0.114180,1.461239,0.608094
1,2025-06-14T03:27:10,105369.325899,5.572533e+10,-0.145567,1.039515,0.471448
2,2025-06-14T15:50:20,104843.155030,4.001282e+10,-0.096693,-0.749482,-0.722681
3,2025-06-14T15:50:29,104843.155030,4.001282e+10,-0.096693,-0.749482,-0.722681
4,2025-06-14T15:50:31,104843.155030,4.001282e+10,-0.096693,-0.749482,-0.722681
...,...,...,...,...,...,...
69,2025-06-19T19:00:05,104368.091732,3.978001e+10,-0.174803,0.659596,-2.978086
70,2025-06-19T20:00:11,104317.658074,3.731709e+10,-0.048323,0.368025,-2.338924
71,2025-06-19T21:00:09,104285.050861,3.756763e+10,-0.031258,-0.544500,-1.623106
72,2025-06-19T22:00:08,104322.033400,3.691929e+10,0.035463,-0.645039,-1.530641


In [17]:
# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')

# Check for any rows that failed conversion
print("Null timestamps after conversion:", df['timestamp'].isnull().sum())

Null timestamps after conversion: 0


In [20]:
# drop exact duplicates
df = df.drop_duplicates()
print("Shape after dropping duplicates:", df.shape)

Shape after dropping duplicates: (73, 6)


In [22]:
# Sort in Ascending order of time
df = df.sort_values('timestamp')

# Reset index
df = df.reset_index(drop=True)
df

Unnamed: 0,timestamp,price,volume_24h,percent_change_1h,percent_change_24h,percent_change_7d
0,2025-06-14 03:07:15,105375.288025,5.542690e+10,-0.114180,1.461239,0.608094
1,2025-06-14 03:27:10,105369.325899,5.572533e+10,-0.145567,1.039515,0.471448
2,2025-06-14 15:50:20,104843.155030,4.001282e+10,-0.096693,-0.749482,-0.722681
3,2025-06-14 15:50:29,104843.155030,4.001282e+10,-0.096693,-0.749482,-0.722681
4,2025-06-14 15:50:31,104843.155030,4.001282e+10,-0.096693,-0.749482,-0.722681
...,...,...,...,...,...,...
68,2025-06-19 19:00:05,104368.091732,3.978001e+10,-0.174803,0.659596,-2.978086
69,2025-06-19 20:00:11,104317.658074,3.731709e+10,-0.048323,0.368025,-2.338924
70,2025-06-19 21:00:09,104285.050861,3.756763e+10,-0.031258,-0.544500,-1.623106
71,2025-06-19 22:00:08,104322.033400,3.691929e+10,0.035463,-0.645039,-1.530641


In [24]:
# Save cleaned data
df.to_csv("btc_quotes_cleaned.csv", index=False)
print("✅ Cleaned dataset saved as 'btc_quotes_cleaned.csv'")

✅ Cleaned dataset saved as 'btc_quotes_cleaned.csv'
