In [5]:
import pandas as pd

df = pd.read_csv('patient_transaction.csv')

df2 = pd.read_csv('online_retail.csv',encoding='ISO-8859-1')
df2 = df2[df2['Country'] == 'United Kingdom']


# Step 1: Convert 'Date' column to dateti
df2['InvoiceDate'] = pd.to_datetime(df2['InvoiceDate'])

# Step 2: Find the first transaction for each Patient ID
first_transaction = df2.groupby('CustomerID')['InvoiceDate'].min().reset_index()

# Step 3: Extract the year of the first transaction
first_transaction['Year'] = first_transaction['InvoiceDate'].dt.year

# Step 4: Count the number of patients acquired per year
historical_acquisition = first_transaction.groupby('Year')['CustomerID'].count().reset_index()
historical_acquisition.rename(columns={'CustomerID': 'Number of Patients Acquired'}, inplace=True)

historical_acquisition


Unnamed: 0,Year,Number of Patients Acquired
0,2010,871
1,2011,3079


In [None]:
import pandas as pd

df = pd.read_csv('patient_transaction.csv')

df2 = pd.read_csv('online_retail.csv')
df2 = df2[df2['Country'] == 'Germany']


# Step 1: Convert 'Date' column to dateti
df2['InvoiceDate'] = pd.to_datetime(df2['InvoiceDate'])

# Step 2: Find the first transaction for each Patient ID
first_transaction = df.groupby('Patient ID')['Date'].min().reset_index()

# Step 3: Extract the year of the first transaction
first_transaction['Year'] = first_transaction['Date'].dt.year

# Step 4: Count the number of patients acquired per year
historical_acquisition = first_transaction.groupby('Year')['Patient ID'].count().reset_index()
historical_acquisition.rename(columns={'Patient ID': 'Number of Patients Acquired'}, inplace=True)

historical_acquisition


Unnamed: 0,Year,Number of Patients Acquired
0,2018,613
1,2019,248
2,2020,85
3,2021,37
4,2022,18
5,2023,8


In [6]:
from sklearn.linear_model import LinearRegression
import numpy as np

# Prepare data
years = historical_acquisition['Year'].values.reshape(-1, 1)  # Feature: years
acquisitions = historical_acquisition['Number of Patients Acquired'].values  # Target

# Fit the model
linear_model = LinearRegression()
linear_model.fit(years, acquisitions)

# Forecast for the next 5 years
future_years = np.arange(years[-1] + 1, years[-1] + 6).reshape(-1, 1)
linear_predictions = linear_model.predict(future_years)

print("Linear Regression Predictions:", linear_predictions)


Linear Regression Predictions: [ 5287.  7495.  9703. 11911. 14119.]


In [7]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Fit the model
holt_model = ExponentialSmoothing(
    historical_acquisition['Number of Patients Acquired'],
    trend='add', seasonal=None
).fit()

# Forecast for the next 5 years
holt_predictions = holt_model.forecast(5)
print("Holt-Winters Predictions:", holt_predictions)


Holt-Winters Predictions: 2     5287.000000
3     7495.000000
4     9702.999999
5    11910.999999
6    14118.999999
dtype: float64


In [11]:
from sklearn.ensemble import RandomForestRegressor

# Prepare data
years = historical_acquisition['Year'].values.reshape(-1, 1)
acquisitions = historical_acquisition['Number of Patients Acquired'].values

# Fit Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(years, acquisitions)

# Forecast for the next 5 years
future_years = np.arange(years[-1] + 1, years[-1] + 6).reshape(-1, 1)
rf_predictions = rf_model.predict(future_years)

print("Random Forest Predictions:", rf_predictions)


Random Forest Predictions: [2504.92 2504.92 2504.92 2504.92 2504.92]


In [12]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Prepare data
scaler = MinMaxScaler()
scaled_acquisitions = scaler.fit_transform(historical_acquisition['Number of Patients Acquired'].values.reshape(-1, 1))

X = np.array([scaled_acquisitions[i:i+1] for i in range(len(scaled_acquisitions) - 1)])
y = scaled_acquisitions[1:]

# Build LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(1, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit(X, y, epochs=200, verbose=0)

# Forecast for the next 5 years
future_acquisitions = []
current_input = scaled_acquisitions[-1].reshape((1, 1, 1))
for _ in range(5):
    prediction = model.predict(current_input, verbose=0)
    future_acquisitions.append(prediction[0][0])
    current_input = np.array(prediction).reshape((1, 1, 1))

# Inverse scale the predictions
future_acquisitions = scaler.inverse_transform(np.array(future_acquisitions).reshape(-1, 1))
print("LSTM Predictions:", future_acquisitions)


  super().__init__(**kwargs)


LSTM Predictions: [[1342.9928]
 [1305.3225]
 [1304.4277]
 [1304.4064]
 [1304.4059]]
