<a href="https://colab.research.google.com/github/Srinivas-8612/Machine-Learning/blob/main/ML_LAB1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# Dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,
              n_redundant=5, n_classes=3, weights=[0.5, 0.3, 0.2],
              class_sep=1.5, flip_y=0.01, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Scale features for models sensitive to scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Logistic Regression
lr = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
lr.fit(X_train_scaled, y_train)
print("Logistic Regression Accuracy:", lr.score(X_test_scaled, y_test))

# Decision Tree
tree = DecisionTreeClassifier(max_depth=10, random_state=42)
tree.fit(X_train, y_train)
print("Decision Tree Accuracy:", tree.score(X_test, y_test))

# SVM with different kernels
svc_linear = SVC(kernel='linear')
svc_linear.fit(X_train_scaled, y_train)
print("Linear SVM Accuracy:", svc_linear.score(X_test_scaled, y_test))

svc_poly = SVC(kernel='poly', degree=3)
svc_poly.fit(X_train_scaled, y_train)
print("Polynomial SVM Accuracy:", svc_poly.score(X_test_scaled, y_test))

svc_rbf = SVC(kernel='rbf')
svc_rbf.fit(X_train_scaled, y_train)
print("RBF SVM Accuracy:", svc_rbf.score(X_test_scaled, y_test))



Logistic Regression Accuracy: 0.89
Decision Tree Accuracy: 0.81
Linear SVM Accuracy: 0.885
Polynomial SVM Accuracy: 0.955
RBF SVM Accuracy: 0.97


In [7]:
from sklearn.datasets import load_wine # Import the real-world dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import numpy as np # Import numpy for better display

# Load the real-world Wine Dataset (178 samples, 13 features, 3 classes)
X_real, y_real = load_wine(return_X_y=True)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_real, y_real, test_size=0.2, random_state=0)

# Scale features for models sensitive to scale (Logistic Regression and SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

results = {}

# --- Logistic Regression ---
lr = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
lr.fit(X_train_scaled, y_train)
results['Logistic Regression'] = lr.score(X_test_scaled, y_test)

# --- Decision Tree ---
# Note: Decision Tree does not require scaled data
tree = DecisionTreeClassifier(max_depth=10, random_state=42)
tree.fit(X_train, y_train)
results['Decision Tree'] = tree.score(X_test, y_test)

# --- SVM with different kernels (uses scaled data) ---
# Linear SVM
svc_linear = SVC(kernel='linear', random_state=42)
svc_linear.fit(X_train_scaled, y_train)
results['Linear SVM'] = svc_linear.score(X_test_scaled, y_test)

# Polynomial SVM
svc_poly = SVC(kernel='poly', degree=3, random_state=42)
svc_poly.fit(X_train_scaled, y_train)
results['Polynomial SVM'] = svc_poly.score(X_test_scaled, y_test)

# RBF SVM
svc_rbf = SVC(kernel='rbf', random_state=42)
svc_rbf.fit(X_train_scaled, y_train)
results['RBF SVM'] = svc_rbf.score(X_test_scaled, y_test)

# Print all results
print("Classification Results on Wine Dataset:")
for model, accuracy in results.items():
    print(f"{model} Accuracy: {accuracy:.4f}")

Classification Results on Wine Dataset:
Logistic Regression Accuracy: 1.0000
Decision Tree Accuracy: 0.9722
Linear SVM Accuracy: 1.0000
Polynomial SVM Accuracy: 0.9444
RBF SVM Accuracy: 1.0000




In [8]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# 1. Dataset Generation (as per document specifications)
X, y = make_classification(
    n_samples=1000,
    n_features=20,
    n_informative=15,               # 15 features contribute to the classification
    n_redundant=5,                  # 5 features are combinations of informative ones
    n_classes=3,
    weights=[0.5, 0.3, 0.2],        # Imbalanced classes
    class_sep=1.5,
    flip_y=0.01,
    random_state=42
)

# 2. Data Splitting
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=0
)

# 3. Feature Scaling (essential for distance-based models like SVM and regularized models like LR)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("--- Classification Model Accuracies ---")

# --- Model 1: Logistic Regression (LR) ---
# multi_class='multinomial' and solver='lbfgs' are used for multi-class problems.
lr = LogisticRegression(
    multi_class='multinomial',
    solver='lbfgs',
    max_iter=1000,
    random_state=42 # Set for reproducibility
)
lr.fit(X_train_scaled, y_train)
lr_accuracy = lr.score(X_test_scaled, y_test)
print(f"Logistic Regression Accuracy: {lr_accuracy:.4f}")

# --- Model 2: Decision Tree (DT) ---
# DT is not sensitive to scaling, so we use the unscaled data.
tree = DecisionTreeClassifier(
    max_depth=10,
    random_state=42
)
tree.fit(X_train, y_train)
tree_accuracy = tree.score(X_test, y_test)
print(f"Decision Tree Accuracy: {tree_accuracy:.4f}")

# --- Model 3: Support Vector Machine (SVM) ---
# All SVM models use the scaled data (X_train_scaled, X_test_scaled).

# 3a. Linear SVM
svc_linear = SVC(kernel='linear', random_state=42)
svc_linear.fit(X_train_scaled, y_train)
svc_linear_accuracy = svc_linear.score(X_test_scaled, y_test)
print(f"Linear SVM Accuracy:          {svc_linear_accuracy:.4f}")

# 3b. Polynomial Kernel SVM
svc_poly = SVC(kernel='poly', degree=3, random_state=42)
svc_poly.fit(X_train_scaled, y_train)
svc_poly_accuracy = svc_poly.score(X_test_scaled, y_test)
print(f"Polynomial SVM Accuracy:      {svc_poly_accuracy:.4f}")

# 3c. RBF Kernel SVM
svc_rbf = SVC(kernel='rbf', random_state=42)
svc_rbf.fit(X_train_scaled, y_train)
svc_rbf_accuracy = svc_rbf.score(X_test_scaled, y_test)
print(f"RBF SVM Accuracy:             {svc_rbf_accuracy:.4f}")

--- Classification Model Accuracies ---
Logistic Regression Accuracy: 0.8900
Decision Tree Accuracy: 0.8100
Linear SVM Accuracy:          0.8850




Polynomial SVM Accuracy:      0.9550
RBF SVM Accuracy:             0.9700


In [9]:
import joblib

In [10]:
joblib.dump(lr, 'logistic_regression_model.joblib')
joblib.dump(tree, 'decision_tree_model.joblib')
joblib.dump(svc_rbf, 'rbf_svm_model.joblib')
# You would typically save all four SVMs if required, or the best one.

['rbf_svm_model.joblib']

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
