In [1]:
import pandas as pd
from datetime import datetime
import numpy as np

# Load the CSV files
deaths_df = pd.read_csv('deaths_malaysia.csv', parse_dates=['date'])
vax_df = pd.read_csv('vax_malaysia.csv', parse_dates=['date'])
cases_df = pd.read_csv('cases_malaysia.csv', parse_dates=['date'])

# Merge all three dataframes on 'date'
merged_df = pd.merge(deaths_df, vax_df, on='date', how='inner')
merged_df = pd.merge(merged_df, cases_df, on='date', how='inner')

# Filter the date range: 2021-07-15 to 2022-01-15
# Note: You mentioned 15/7/2022 to 15/1/2021, which is reversed. Assuming you meant 2021-07-15 to 2022-01-15 based on context
start_date = datetime(2021, 7, 15)
end_date = datetime(2022, 1, 15)
filtered_df = merged_df[(merged_df['date'] >= start_date) & (merged_df['date'] <= end_date)]

# Randomize (shuffle) the data
randomized_df = filtered_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Print the shape to confirm (should be around 184-185 rows depending on data availability)
print("Randomized DataFrame shape:", randomized_df.shape)

# Save the full randomized DataFrame to a CSV file instead of displaying all rows
randomized_df.to_csv('randomized_malaysia_data.csv', index=False)
print("Full randomized data saved to 'randomized_malaysia_data.csv'")

# Optional: Display a summary of the data
print("\nData Summary:")
print(randomized_df.describe())
print("\nColumn Names:")
print(randomized_df.columns.tolist())

Randomized DataFrame shape: (185, 89)
Full randomized data saved to 'randomized_malaysia_data.csv'

Data Summary:
                      date  deaths_new  deaths_bid  deaths_new_dod  \
count                  185  185.000000  185.000000      185.000000   
mean   2021-10-15 00:00:00  136.637838   30.270270      130.140541   
min    2021-07-15 00:00:00   11.000000    0.000000        9.000000   
25%    2021-08-30 00:00:00   41.000000    8.000000       40.000000   
50%    2021-10-15 00:00:00   88.000000   15.000000       74.000000   
75%    2021-11-30 00:00:00  219.000000   35.000000      233.000000   
max    2022-01-15 00:00:00  592.000000  176.000000      408.000000   
std                    NaN  120.285049   33.794495      107.944489   

       deaths_bid_dod  deaths_unvax  deaths_pvax  deaths_fvax  deaths_boost  \
count      185.000000    185.000000   185.000000   185.000000    185.000000   
mean        28.535135     75.097297    29.297297    25.551351      0.194595   
min          0.000