In [2]:
import pandas as pd
# Load the data from the CSV file
glass_data = pd.read_csv('C:/Users/xy200/OneDrive/Desktop/glass.csv')
# Display the first few rows of the dataframe to understand its structure
glass_data.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score

model = LogisticRegression()

X_train_Al = X_train_scaled[:, X.columns.get_loc('Al')]
X_test_Al = X_test_scaled[:, X.columns.get_loc('Al')]

X_train_Al = X_train_Al.reshape(-1, 1)
X_test_Al = X_test_Al.reshape(-1, 1)

model.fit(X_train_Al, y_train)

probabilities_Al = model.predict_proba(X_test_Al)[:, 1]

for threshold in [0.3, 0.4, 0.5, 0.6, 0.7]:
    predictions = (probabilities_Al >= threshold).astype(int)
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions, average='macro', zero_division=0)
    recall = recall_score(y_test, predictions, average='macro', zero_division=0)
    print(f"Threshold: {threshold}, Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}")

Threshold: 0.3, Accuracy: 0.20930232558139536, Precision: 0.04433497536945813, Recall: 0.1168831168831169
Threshold: 0.4, Accuracy: 0.09302325581395349, Precision: 0.03007518796992481, Recall: 0.05194805194805195
Threshold: 0.5, Accuracy: 0.0, Precision: 0.0, Recall: 0.0
Threshold: 0.6, Accuracy: 0.0, Precision: 0.0, Recall: 0.0
Threshold: 0.7, Accuracy: 0.0, Precision: 0.0, Recall: 0.0


Repeat the above try by selecting more columns and fit model

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
# Define thresholds to test
thresholds = [0.3, 0.4, 0.5, 0.6, 0.7]
# Separate features and target
X = glass_data.drop(columns='Type')
y = glass_data['Type']
# Create a logistic regression classifier
lr = LogisticRegression(max_iter=10000)
# store the evaluation metrics in a dictionary for each feature
evaluation_metrics = {}
# Split the data into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Fit the logistic regression model on the scaled training data
lr.fit(X_train_scaled, y_train)
# Get prediction probabilities for the positive class
test_probabilities = lr.predict_proba(X_test_scaled)[:, 1]
# Function to calculate metrics based on the threshold
def calculate_metrics(y_true, probabilities, threshold):
    # Apply threshold to probabilities to get binary predictions
    y_pred = (probabilities >= threshold).astype(int)
    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    return accuracy, precision, recall
# Perform the threshold analysis for each column
for col in X.columns:
    metrics = []
    for threshold in thresholds:
        # Calculate metrics for each threshold
        acc, prec, rec = calculate_metrics(y_test, test_probabilities, threshold)
        metrics.append((threshold, acc, prec, rec))
    evaluation_metrics[col] = metrics
evaluation_metrics

{'RI': [(0.3, 0.09302325581395349, 0.024844720496894408, 0.05194805194805195),
  (0.4, 0.09302325581395349, 0.031746031746031744, 0.05194805194805195),
  (0.5, 0.023255813953488372, 0.015873015873015872, 0.012987012987012988),
  (0.6, 0.0, 0.0, 0.0),
  (0.7, 0.0, 0.0, 0.0)],
 'Na': [(0.3, 0.09302325581395349, 0.024844720496894408, 0.05194805194805195),
  (0.4, 0.09302325581395349, 0.031746031746031744, 0.05194805194805195),
  (0.5, 0.023255813953488372, 0.015873015873015872, 0.012987012987012988),
  (0.6, 0.0, 0.0, 0.0),
  (0.7, 0.0, 0.0, 0.0)],
 'Mg': [(0.3, 0.09302325581395349, 0.024844720496894408, 0.05194805194805195),
  (0.4, 0.09302325581395349, 0.031746031746031744, 0.05194805194805195),
  (0.5, 0.023255813953488372, 0.015873015873015872, 0.012987012987012988),
  (0.6, 0.0, 0.0, 0.0),
  (0.7, 0.0, 0.0, 0.0)],
 'Al': [(0.3, 0.09302325581395349, 0.024844720496894408, 0.05194805194805195),
  (0.4, 0.09302325581395349, 0.031746031746031744, 0.05194805194805195),
  (0.5, 0.0232558139