## MODEL 

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix, classification_report

class FruitClassifier:

  def __init__(self, filepath):
    self.data = self.load_data(filepath)

  def load_data(self, filepath):
    return pd.read_csv(filepath, index_col=0)

  def preprocess_data(self):
    X = self.data[['mass', 'width', 'height', 'color_score']]  # Features
    y = self.data['fruit_label']  # Target
    return X, y

  def split_data(self, test_size=0.2, random_state=42):
    X, y = self.preprocess_data()
    return train_test_split(X, y, test_size=test_size, random_state=random_state)

  def train_model(self, X_train, y_train):
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    return model

  def evaluate_model(self, model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)
    return accuracy, f1, recall, precision, conf_matrix, class_report

  def run(self):
    X_train, X_test, y_train, y_test = self.split_data()
    model = self.train_model(X_train, y_train)
    accuracy, f1, recall, precision, conf_matrix, class_report = self.evaluate_model(model, X_test, y_test)

    print("First few rows of the dataset:")
    print(self.data.head())

    print(f"Accuracy: {accuracy}")
    print(f"F1 Score: {f1}")
    print(f"Recall: {recall}")
    print(f"Precision: {precision}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)

if __name__ == "__main__":
  filepath = "C:\\Users\\LENOVO\\Downloads\\fruits.csv"
  classifier = FruitClassifier(filepath)
  classifier.run()


First few rows of the dataset:
            fruit_label fruit_name fruit_subtype    mass  width  height  \
Unnamed: 0                                                                
0                     1      apple  granny_smith  198.04   8.47    7.29   
1                     1      apple  granny_smith  169.98   7.84    6.41   
2                     1      apple  granny_smith  183.37   7.58    7.62   
3                     2   mandarin      mandarin   88.73   6.31    4.55   
4                     2   mandarin      mandarin   80.54   6.30    4.83   

            color_score  
Unnamed: 0               
0                  0.54  
1                  0.59  
2                  0.60  
3                  0.82  
4                  0.83  
Accuracy: 0.8050847457627118
F1 Score: 0.7987770167287951
Recall: 0.8050847457627118
Precision: 0.8099324774327832
Confusion Matrix:
[[29  0  6  0]
 [ 0  9  0  0]
 [14  0 23  3]
 [ 0  0  0 34]]
Classification Report:
              precision    recall  f1-score 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
