In [14]:
import random
from faker import Faker
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Initialize the Faker object
fake = Faker()

# Create an empty list to store the data
data = []

# Generate data for 100 employees
for i in range(100):
    # Generate a random name
    name = fake.name()
    # Generate a random age between 18 and 60
    age = random.randint(18, 60)
    # Generate a random gender
    gender = random.choice(['male', 'female'])
    # Generate a random number of previous donations between 0 and 5
    previous_donations = random.randint(0, 5)
    # Generate random attendance of blood donation camp
    attended_donation = random.choice([0,1])
    data.append([name, age, gender, previous_donations,attended_donation])

# Convert the data to a pandas DataFrame
df = pd.DataFrame(data, columns=["name", "age", "gender", "previous_donations","attended_donation"])


In [15]:
# Encode the 'gender' column
le = LabelEncoder()
df['gender'] = le.fit_transform(df['gender'])

# One-hot encode the 'gender' column
df = pd.get_dummies(df, columns=["gender"])

# Saving the generated data to csv file
df.to_csv("blood_donation_data.csv",index=False)

# Split the data into training and testing sets
X = df.drop(columns = ['attended_donation','name'])
y = df['attended_donation']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train the model
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Evaluate
y_pred = lin_reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R-squared:", r2)

# Create new data for prediction
new_data = [[40, 0, 1, 3]] # This is an example, replace with actual data 40 is the age of the employee, 0 is the gender of the employee, where 0 represents male and 1 represents female, 1 is the number of previous donations of the employee ,3 is the number of times the employee has attended the blood donation camp in the past.

# Make predictions
predicted_prob = lin_reg.predict(new_data)
print("Predicted probability of attendance:", predicted_prob)

# You can use a threshold value to decide if the employee is likely to attend the blood donation camp or not
# For example, if the predicted probability is greater than 0.5, then the employee is likely to attend
threshold = 0.5
if predicted_prob > threshold:
    print("The employee is likely to attend the blood donation camp")
else:
    print("The employee is not likely to attend the blood donation camp")


Mean Squared Error: 0.24619920474606483
Mean Absolute Error: 0.49001111417784793
R-squared: -0.025830019775269886
Predicted probability of attendance: [0.4522327]
The employee is not likely to attend the blood donation camp


In [None]:
#note column gender_0 = male and gender_1 = female