In [None]:
import pandas as pd
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Feature selection using chi-square test
def perform_chi2_feature_selection(X, y, k=5):
    chi2_selector = SelectKBest(chi2, k=k)
    X_new = chi2_selector.fit_transform(X, y)
    return X_new

In [None]:
# Split data into training and testing sets
def split_data(X, y, test_size=0.2, random_state=42):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test

In [None]:
def train_svm(X_train, y_train, C=1.0, kernel='linear'):
    svm_classifier = SVC(C=C, kernel=kernel)
    svm_classifier.fit(X_train, y_train)
    return svm_classifier

In [None]:
def evaluate_svm(svm_classifier, X_test, y_test):
    y_pred = svm_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return accuracy, report

In [None]:
if __name__ == "__main__":
    # Load your data into a pandas DataFrame
    # Replace 'your_data.csv' with the actual file path of your dataset
    data = pd.read_csv('/content/fruit.csv')

    # Separate the target variable from features
    dff = data.drop('fruit_label', axis=1)
    df = dff.drop('fruit_subtype', axis=1)
    X = df.drop('fruit_name', axis=1)
    y = data['fruit_label']

    # Step 1: Perform chi-square feature selection
    k_selected_features = 4   # Adjust this value based on how many top features you want to select
    X_selected = perform_chi2_feature_selection(X, y, k = k_selected_features)

    # Step 2: Split the data into training and testing sets
    X_train, X_test, y_train, y_test = split_data(X_selected, y)

    # Step 3: Train the SVM classifier
    svm_classifier = train_svm(X_train, y_train)

    # Step 4: Evaluate the SVM classifier
    accuracy, report = evaluate_svm(svm_classifier, X_test, y_test)
    print("Accuracy:", accuracy)
    print("Classification Report:")
    print(report)

Accuracy: 0.75
Classification Report:
              precision    recall  f1-score   support

           1       0.67      0.67      0.67         3
           2       1.00      1.00      1.00         2
           3       0.33      0.50      0.40         2
           4       1.00      0.80      0.89         5

    accuracy                           0.75        12
   macro avg       0.75      0.74      0.74        12
weighted avg       0.81      0.75      0.77        12



In [None]:
import pandas as pd
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Assuming you have a pandas DataFrame with your data
# X contains the features, and y contains the target variable
# Make sure the target variable is encoded with numerical labels (e.g., 0, 1, 2, ...)

# Step 1: Feature selection using chi-square test
def perform_chi2_feature_selection(X, y, k=5):
    chi2_selector = SelectKBest(chi2, k=k)
    X_new = chi2_selector.fit_transform(X, y)
    return X_new

# Step 2: Split data into training and testing sets
def split_data(X, y, test_size=0.2, random_state=42):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test

# Step 3: Train SVM classifier
def train_svm(X_train, y_train, C=1.0, kernel='linear'):
    svm_classifier = SVC(C=C, kernel=kernel)
    svm_classifier.fit(X_train, y_train)
    return svm_classifier

# Step 4: Evaluate SVM
def evaluate_svm(svm_classifier, X_test, y_test):
    y_pred = svm_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return accuracy, report

if __name__ == "__main__":
    # Load your data into a pandas DataFrame
    # Replace 'your_data.csv' with the actual file path of your dataset
    data = pd.read_csv('/content/fruit.csv')

    # Separate the target variable from features
    dff = data.drop('fruit_label', axis=1)
    df = dff.drop('fruit_subtype', axis=1)
    X = df.drop('fruit_name', axis=1)
    y = data['fruit_label']

    # Step 1: Perform chi-square feature selection
    k_selected_features = 4  # Adjust this value based on how many top features you want to select
    X_selected = perform_chi2_feature_selection(X, y, k=k_selected_features)

    # Step 2: Split the data into training and testing sets
    X_train, X_test, y_train, y_test = split_data(X_selected, y)

    # Step 3: Train the SVM classifier
    svm_classifier = train_svm(X_train, y_train)

    # Step 4: Evaluate the SVM classifier
    accuracy, report = evaluate_svm(svm_classifier, X_test, y_test)
    print("Accuracy:", accuracy)
    print("Classification Report:")
    print(report)


Accuracy: 0.75
Classification Report:
              precision    recall  f1-score   support

           1       0.67      0.67      0.67         3
           2       1.00      1.00      1.00         2
           3       0.33      0.50      0.40         2
           4       1.00      0.80      0.89         5

    accuracy                           0.75        12
   macro avg       0.75      0.74      0.74        12
weighted avg       0.81      0.75      0.77        12

