Title: Popular Classification Algorithms

Logistic Regression


Task 1: Predict the likelihood of a student passing a test based on study hours.

In [1]:
# Task 1: Predict the likelihood of a student passing a test based on study hours using Logistic Regression

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Example dataset: study hours and pass/fail (1=pass, 0=fail)
data = {
    'study_hours': [1, 2, 2.5, 3, 3.5, 4, 4.5, 5, 6, 7, 8, 9],
    'passed':      [0, 0,   0, 0,   0, 1,   1, 1, 1, 1, 1, 1]
}
df = pd.DataFrame(data)

# Features and target
X = df[['study_hours']]
y = df['passed']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Train logistic regression model
clf = LogisticRegression()
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=['Fail', 'Pass']))

# Predict probability of passing for a new student who studied 5 hours
prob_pass = clf.predict_proba([[5]])[0][1]
print(f"Predicted probability of passing for 5 study hours: {prob_pass:.2f}")

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

        Fail       1.00      1.00      1.00         1
        Pass       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

Predicted probability of passing for 5 study hours: 0.83




Task 2: Predict customer churn based on service usage data.

In [2]:
# Task 2: Predict customer churn based on service usage data using Logistic Regression

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Example dataset: service usage features and churn (1=churned, 0=not churned)
data = {
    'monthly_minutes': [300, 250, 400, 150, 500, 100, 350, 200, 450, 120],
    'customer_support_calls': [1, 3, 0, 5, 2, 6, 1, 4, 0, 7],
    'contract_length_months': [12, 24, 12, 6, 24, 6, 12, 6, 24, 6],
    'churn': [0, 0, 0, 1, 0, 1, 0, 1, 0, 1]
}
df = pd.DataFrame(data)

# Features and target
X = df.drop('churn', axis=1)
y = df['churn']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train logistic regression model
clf = LogisticRegression()
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=['Not Churned', 'Churned']))

# Predict probability of churn for a new customer
new_customer = [[400, 2, 12]]  # monthly_minutes, customer_support_calls, contract_length_months
prob_churn = clf.predict_proba(new_customer)[0][1]
print(f"Predicted probability of churn for new customer: {prob_churn:.2f}")

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

 Not Churned       1.00      1.00      1.00         2
     Churned       1.00      1.00      1.00         1

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

Predicted probability of churn for new customer: 0.00




Task 3: Classify if a review is positive or negative using NLP.

In [3]:
# Task 3: Classify if a review is positive or negative using NLP and Logistic Regression

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Example dataset: text reviews and sentiment (1=positive, 0=negative)
data = {
    'review': [
        "I loved this product, it works great!",
        "Terrible experience, will not buy again.",
        "Absolutely fantastic, highly recommend.",
        "Worst purchase ever, very disappointed.",
        "Good quality and fast shipping.",
        "Not worth the money.",
        "Exceeded my expectations!",
        "The item broke after one use.",
        "Very satisfied with my order.",
        "Awful, do not recommend."
    ],
    'sentiment': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
}
df = pd.DataFrame(data)

# Features and target
X = df['review']
y = df['sentiment']

# Convert text to feature vectors
vectorizer = CountVectorizer()
X_vec = vectorizer.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.3, random_state=42)

# Train logistic regression model
clf = LogisticRegression()
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))

# Predict sentiment for a new review
new_review = ["This is the best purchase I've made!"]
new_review_vec = vectorizer.transform(new_review)
prob_positive = clf.predict_proba(new_review_vec)[0][1]
print(f"Predicted probability of positive sentiment: {prob_positive:.2f}")

Accuracy: 0.6666666666666666
Classification Report:
               precision    recall  f1-score   support

    Negative       1.00      0.50      0.67         2
    Positive       0.50      1.00      0.67         1

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3

Predicted probability of positive sentiment: 0.52
