In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

# Load the dataset
df = pd.read_parquet('Train.parquet')

# Filter the dataset for "Target Drug" incidents only
target_drug_df = df[df['Incident'] == 'TARGET DRUG']

# Convert the dates to month-year format
target_drug_df['MonthYear'] = pd.to_datetime(target_drug_df['Date']).dt.to_period('M')

# Group the data by patient and month
patient_month_df = target_drug_df.groupby(['Patient-Uid', 'MonthYear']).size().reset_index(name='Prescriptions')

# Pivot the table to have patients as rows and months as columns
pivot_table = patient_month_df.pivot(index='Patient-Uid', columns='MonthYear', values='Prescriptions')
pivot_table = pivot_table.fillna(0)  # Fill missing values with 0

# Apply K-Means clustering
kmeans = KMeans(n_clusters=3)  # Adjust the number of clusters as needed
kmeans.fit(pivot_table.values)

# Assign cluster labels to each patient
patient_month_df['Cluster'] = kmeans.labels_

# Visualize the prescription patterns
clusters = sorted(patient_month_df['Cluster'].unique())
plt.figure(figsize=(12, 6))

for cluster in clusters:
    cluster_data = patient_month_df[patient_month_df['Cluster'] == cluster]
    prescriptions = cluster_data.groupby('MonthYear')['Patient-Uid'].count()
    plt.plot(prescriptions.index, prescriptions.values, label=f'Cluster {cluster + 1}')

plt.xlabel('Month')
plt.ylabel('Prescriptions')
plt.title('Prescription Patterns for Target Drug')
plt.legend()
plt.xticks(rotation=45)
plt.show()
