In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import requests
from io import BytesIO
import seaborn as sns
import numpy as np
import plotly.express as px
from datetime import datetime, timedelta



def download_csv(url, local_path):
    response = requests.get(url)
    if response.status_code == 200:
        with open(local_path, 'wb') as file:
            file.write(response.content)
        print(f"File downloaded successfully to {local_path}")
    else:
        print(f"Failed to download file from {url}. Status code: {response.status_code}")

def send_image_to_telegram(bot_token, chat_id, image):
    files = {'photo': BytesIO(image)}
    response = requests.post(f'https://api.telegram.org/bot{bot_token}/sendPhoto?chat_id={chat_id}', files=files)
    if response.status_code == 200:
        print("Image sent successfully!")
    else:
        print(f"Failed to send image. Status code: {response.status_code}")

# Telegram Bot token and chat ID
bot_token = "6546034350:AAEPqhhqSBFbTdcd28H6Ffz3k35NrbZToeo"
chat_id = "-4169004006"  # Example group chat ID

# URLs and local paths for the CSV files
donations_facility_url = "https://raw.githubusercontent.com/MoH-Malaysia/data-darah-public/main/donations_facility.csv"
donations_facility_local_path = "donations_facility.csv"

donations_state_url = "https://raw.githubusercontent.com/MoH-Malaysia/data-darah-public/main/donations_state.csv"
donations_state_local_path = "donations_state.csv"

newdonors_facility_url = "https://raw.githubusercontent.com/MoH-Malaysia/data-darah-public/main/newdonors_facility.csv"
newdonors_facility_local_path = "newdonors_facility.csv"

newdonors_state_url = "https://raw.githubusercontent.com/MoH-Malaysia/data-darah-public/main/newdonors_state.csv"
newdonors_state_local_path = "newdonors_state.csv"

# Download each CSV file
download_csv(donations_facility_url, donations_facility_local_path)
download_csv(donations_state_url, donations_state_local_path)
download_csv(newdonors_facility_url, newdonors_facility_local_path)
download_csv(newdonors_state_url, newdonors_state_local_path)

# Read the Parquet file into a DataFrame
parquet_file = "blood_donation_retention_2024.parquet"
df = pd.read_parquet(parquet_file)

# Convert visit_date and birth_date to datetime
df['visit_date'] = pd.to_datetime(df['visit_date'])
df['birth_date'] = pd.to_datetime(df['birth_date'])

# Group the data by donor_id and count the number of visits for each donor
visit_counts = df.groupby('donor_id')['visit_date'].count()

# Identify repeat donors (visited more than once)
repeat_donors = visit_counts[visit_counts > 1].index

# Filter the original DataFrame to include only repeat donors
repeat_donors_df = df[df['donor_id'].isin(repeat_donors)]

# Calculate the number of returns for each donor
repeat_donors_df['return_count'] = repeat_donors_df.groupby('donor_id')['visit_date'].transform('count')

# Categorize the donors based on return count
repeat_donors_df['return_category'] = pd.cut(repeat_donors_df['return_count'], bins=[0, 1, 5, 10, 20, float('inf')],
                                             labels=['1 time', '2-5 times', '6-10 times', '11-20 times', 'more than 20 times'])

# Group by year and return category, and count the number of unique donors
grouped_data = repeat_donors_df.groupby([repeat_donors_df['visit_date'].dt.year, 'return_category'])['donor_id'].nunique().unstack().fillna(0)

# Create a figure with specified size
fig, ax = plt.subplots(figsize=(30, 8))  # Adjust the width (14) to make the graph wider

# Plot the stacked line chart
grouped_data.plot(kind='line', marker='o', linewidth=2, ax=ax)

# Add title and labels
ax.set_title('Retention Trends of Blood Donors Over Time Across Return Categories', fontsize=16)
ax.set_xlabel('Year', fontsize=14)
ax.set_ylabel('Number of Unique Donors', fontsize=14)
ax.tick_params(axis='x', rotation=90)  # Rotate x-axis labels by 90 degrees
ax.grid(True, linestyle='--', alpha=0.7)

# Add legend
ax.legend(title='Return Category', fontsize=12, bbox_to_anchor=(1.02, 1), loc='upper left')

# Save the plot as an image in memory
buffer = BytesIO()
plt.savefig(buffer, format='png')
buffer.seek(0)

# Send the image to the Telegram group
send_image_to_telegram(bot_token, chat_id, buffer.read())

# Close the plot
plt.close()


# Read the CSV file into a DataFrame
data = pd.read_csv('donations_facility.csv')

# Calculate the total number of donors for each blood type
blood_types = ['blood_a', 'blood_b', 'blood_o', 'blood_ab']

# Convert picogram data to appropriate units
picograms_to_nanograms = 1e-3  # 1 picogram = 1e-3 nanograms

# Calculate the total number of donors for each blood type
total_donors_by_blood_type = data[blood_types].sum() * picograms_to_nanograms

# Plotting
plt.figure(figsize=(20, 15))
plt.subplots_adjust(wspace=0.5)

for i, blood_type in enumerate(blood_types, start=1):
    # Calculate the total number of donors for each hospital for the current blood type
    total_donors_by_hospital = data.groupby('hospital')[blood_type].sum() * picograms_to_nanograms
    
    # Plot the bar chart
    plt.subplot(2, 2, i)
    plt.bar(total_donors_by_hospital.index, total_donors_by_hospital.values, color='red')
    plt.title(f'Total Number of Blood Donors by Blood Type Across Hospitals ( {blood_type.upper()} )')
    plt.xlabel('Hospital')
    plt.ylabel('Total Donors (picograms)')
    plt.xticks(rotation=90)

plt.tight_layout()

# Save the plot as an image in memory
buffer = BytesIO()
plt.savefig(buffer, format='png')
buffer.seek(0)

# Send the image to the Telegram group
send_image_to_telegram(bot_token, chat_id, buffer.read())

# Close the plot
plt.close()

# Read the data from the CSV file
data = pd.read_csv("donations_state.csv")

# Exclude rows where the state is "Malaysia"
data = data[data['state'] != 'Malaysia']

# Convert the 'date' column to datetime format
data['date'] = pd.to_datetime(data['date'])

# Extract year from the date and store it in a new column
data['year'] = data['date'].dt.year

# Group the data by year and state, summing the daily donations
grouped_data = data.groupby(['year', 'state'], as_index=False)['daily'].sum()

# Pivot the data to have years as rows, states as columns, and daily donations as values
pivot_data = grouped_data.pivot(index='year', columns='state', values='daily')

# Set a larger figure size
plt.figure(figsize=(12, 12))

# Create a heatmap
sns.heatmap(pivot_data, annot=True, fmt="d", cmap="YlGnBu", cbar=True)  # annot=True to enable annotations, fmt="d" for integer formatting
plt.xlabel('State')
plt.ylabel('Year')
plt.title('Annual Blood Donation Heatmap: Contributions by State')

# Save the plot as an image in memory
buffer = BytesIO()
plt.savefig(buffer, format='png')
buffer.seek(0)

# Send the image to the Telegram group
send_image_to_telegram(bot_token, chat_id, buffer.read())

# Close the plot
plt.close()

# Read the CSV file into a DataFrame
data = pd.read_csv('donations_state.csv')

# Convert the 'date' column to datetime and extract the year
data['date'] = pd.to_datetime(data['date'])
data['year'] = data['date'].dt.year

# Filter out rows where the state is "Malaysia"
malaysia_data = data[data['state'] == 'Malaysia']

# Group the Malaysia data by year and sum up the daily donations
malaysia_donations = malaysia_data.groupby('year')['daily'].sum().reset_index()

# Plotting
plt.figure(figsize=(12, 6))

# Customizing the line plot
sns.lineplot(data=malaysia_donations, x='year', y='daily', color='skyblue', marker='o', markersize=8, linewidth=2.5)

# Adding title and labels
plt.title('Blood Donation Trends in Malaysia: A Comprehensive Overview', fontsize=16)
plt.xlabel('Year', fontsize=14)
plt.ylabel('Total Daily Donations', fontsize=14)

# Customizing ticks
plt.xticks(np.arange(malaysia_donations['year'].min(), malaysia_donations['year'].max()+1, 1), fontsize=12)
plt.yticks(fontsize=12)

# Adding grid lines
plt.grid(True, linestyle='--', alpha=0.7)

# Adding a background color
plt.gca().set_facecolor('#f9f9f9')

# Adding a legend
plt.legend(['Total Daily Donations'], loc='upper left', fontsize=12)

# Removing spines
sns.despine()

# Adding annotations
for index, row in malaysia_donations.iterrows():
    plt.text(row['year'], row['daily'], f"{row['daily']}", ha='center', va='bottom', fontsize=10, color='black')

plt.tight_layout()

# Save the plot as an image in memory
buffer = BytesIO()
plt.savefig(buffer, format='png')
buffer.seek(0)

# Send the image to the Telegram group
send_image_to_telegram(bot_token, chat_id, buffer.read())

# Close the plot
plt.close()

# Step 1: Read the data from the CSV file
data = pd.read_csv('newdonors_state.csv')

# Step 2: Filter the data for the state 'Malaysia'
malaysia_data = data[data['state'] == 'Malaysia']

# Step 3: Calculate the percentage of each age group for each year
years = malaysia_data['date'].apply(lambda x: x.split(',')[0])
age_columns = ['17-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', 'other']

# Convert the columns to numeric
malaysia_data[age_columns] = malaysia_data[age_columns].apply(pd.to_numeric, errors='coerce')

# Calculate percentages
malaysia_data['total'] = malaysia_data[age_columns].sum(axis=1)
percentages = malaysia_data[age_columns].div(malaysia_data['total'], axis=0) * 100

# Reshape data for scatter plot
scatter_data = pd.concat([years, percentages], axis=1)
scatter_data = scatter_data.melt(id_vars='date', var_name='age_group', value_name='percentage')

# Step 4: Create the scatter plot
fig = px.scatter(scatter_data, x='date', y='percentage', color='age_group', 
                 title='Percentage of Blood Donors by Age Group in Malaysia',
                 labels={'date': 'Year', 'percentage': 'Percentage', 'age_group': 'Age Group'},
                 hover_data={'percentage': ':.2f%'},
                 width=1000, height=600)

# Save the plot as an image in memory
image_bytes = fig.to_image(format="png")

# Send the image to the Telegram group
send_image_to_telegram(bot_token, chat_id, image_bytes)

# Read the CSV file into a DataFrame
data = pd.read_csv('donations_state.csv')

# Convert the 'date' column to datetime
data['date'] = pd.to_datetime(data['date'])

# Calculate the date range for the past 7 days including yesterday
end_date = datetime.now() - timedelta(days=1)
start_date = end_date - timedelta(days=6)

# Filter data for the past 7 days
filtered_data = data[(data['date'] >= start_date) & (data['date'] <= end_date)]

# Group the data by date and sum up the daily donations for weekly data
weekly_data = filtered_data.resample('D', on='date').sum().reset_index()

# Plotting
plt.figure(figsize=(12, 6))

# Customizing the line plot
sns.lineplot(data=weekly_data, x='date', y='daily', marker='o', markersize=8, linewidth=2.5)

# Adding title and labels
plt.title('Daily Donation Trend in Malaysia Over the Past 7 Days', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Total Daily Donations', fontsize=14)

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)

# Adding grid lines
plt.grid(True, linestyle='--', alpha=0.7)

# Adding a background color
plt.gca().set_facecolor('#f9f9f9')

# Removing spines
sns.despine()

plt.tight_layout()

# Save the plot as an image in memory
buffer = BytesIO()
plt.savefig(buffer, format='png')
buffer.seek(0)

# Send the image to the Telegram group
send_image_to_telegram(bot_token, chat_id, buffer.read())

# Close the plot
plt.close()
