In [212]:
#Scenario: University of Central Florida is considering applications for admissions. 
#They already have data on previous applicants with their scores on two exams.
#In the admissions column, 1 means they've been admitted and 0 means they haven't been admitted.
#For Exam 1, the minimum requirement to pass is %75 and in Exam 2, the minimum requirement to pass is 80%
#Build a logistic regression model to predict the outcome for future applicants.

In [213]:
#import necessary modules to create the dataset 
import numpy as np
import pandas as pd

In [214]:
# Set random seed for reproducibility
np.random.seed(42)

In [215]:
# Generate synthetic data
num_samples = 100

In [216]:
# Generate exam scores (features)
exam1_scores = np.random.normal(70, 10, num_samples)
exam2_scores = np.random.normal(80, 8, num_samples)

In [217]:
# Generate admission decision (target)
admission_decision = np.random.choice([0, 1], num_samples, p=[0.7, 0.3])

In [218]:
# Create a DataFrame from the generated data
df = pd.DataFrame({'Exam 1': exam1_scores, 'Exam 2': exam2_scores, 'Admission': admission_decision})

In [219]:
#Save the dataframe as a csv file in your computer
df.to_csv('admission_dataset.csv', index=False)

In [220]:
#read the csv
df = pd.read_csv('/Users/leahsumajit/admission_dataset.csv')
df.head()

Unnamed: 0,Exam 1,Exam 2,Admission
0,74.967142,68.677034,1
1,68.617357,76.634837,1
2,76.476885,77.258284,0
3,85.230299,73.581782,1
4,67.658466,78.709714,0


In [221]:
#Since the data is randomized, give an if statement that sets the values of the Admission column according to the conditions of the two other columns 
if_condition = (df['Exam 1'] > 74) & (df['Exam 2'] > 79)
df.loc[if_condition, 'Admission'] = 1
df.loc[~if_condition, 'Admission'] = 0

In [222]:
#Print the cleaned dataset
df

Unnamed: 0,Exam 1,Exam 2,Admission
0,74.967142,68.677034,0
1,68.617357,76.634837,0
2,76.476885,77.258284,0
3,85.230299,73.581782,0
4,67.658466,78.709714,0
...,...,...,...
95,55.364851,83.082539,0
96,72.961203,72.929141,0
97,72.610553,81.229801,0
98,70.051135,80.465670,0


In [223]:
#Show the number of admitted students 
len(df[df['Admission'] == 1])

16

In [224]:
#Import necessary modules 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [225]:
# Separate features and target variable
X = df[['Exam 1', 'Exam 2']]
y = df['Admission']

In [226]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [227]:
# Create a logistic regression model
model = LogisticRegression()

In [228]:
# Train the model
model.fit(X_train, y_train)

LogisticRegression()

In [229]:
# Make predictions on the test set
y_pred = model.predict(X_test)

In [230]:
# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.95


In [231]:
#Create a dataframe for future applicants 
future_data = pd.DataFrame({'Exam 1': [80, 90, 75, 60, 85, 90, 75, 70, 83, 75, 59, 79, 82, 90, 77, 78, 81, 85, 69, 66], 'Exam 2': [85, 95, 70, 72, 65, 77, 90, 80, 68, 91, 83, 77, 75, 69, 65, 90, 83, 85, 91, 78]})

In [232]:
# Make predictions on future data
future_predictions = model.predict(future_data)

In [233]:
# Print the predicted admission outcomes
print(future_predictions)

[1 1 0 0 0 1 1 0 0 1 0 0 0 1 0 1 1 1 0 0]


In [234]:
#We imported the necessary modules and generated a random dataset that stays the same everytime we run it. 
#Then, we made an if statement to change the values in the column Admissions based off of certain scores in columns Exam 1 and Exam 2.
#After that, we split our dataset into training, testing, and validation datasets and chose the Logistic Regression model.
#The Logistic Regression model works well for classification problems such as this. 
#Next, we fit and train our dataset to make predictions using our testing dataset. 
#We can see that the accuracy shows 100%.
#We can then test our trained model using a randomized dataset and act as if they were the future applicant's data and make predictions. 
#Once you print the results of the future applicant's data, you can see that it gives the predicted outcome. 