In [2]:
import pandas as pd

df = pd.read_excel("Datasets/Raisin_Dataset.xlsx")
df.sample(5)

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter,Class
844,96762,457.477488,277.38678,0.795206,101717,0.788683,1248.75,Besni
537,82853,430.114997,251.1757,0.811773,85292,0.746437,1139.84,Besni
475,177264,619.958472,366.76324,0.806237,180994,0.683219,1652.694,Besni
664,104669,546.672756,248.527908,0.890686,110984,0.687346,1398.545,Besni
165,59970,353.001625,219.682942,0.782756,61834,0.755823,978.631,Kecimen


In [3]:
X = df[["Area", "MajorAxisLength", "MinorAxisLength", "Eccentricity", "ConvexArea", "Extent", "Perimeter"]]
y = df["Class"]

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [4]:
from sklearn.svm import SVC

model = SVC(kernel="rbf")
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

from sklearn.metrics import classification_report

report = classification_report(y_test, y_pred)
print(report)

model.n_iter_

              precision    recall  f1-score   support

       Besni       0.86      0.75      0.80        83
     Kecimen       0.81      0.90      0.85        97

    accuracy                           0.83       180
   macro avg       0.83      0.82      0.82       180
weighted avg       0.83      0.83      0.83       180



array([229], dtype=int32)

# **ENSEMBLE METHOD**

In [5]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# Create different models
log_model = LogisticRegression()
dt_model = DecisionTreeClassifier()
svm_model = SVC(probability=True)

# Create a voting classifier
voting_clf = VotingClassifier(
    estimators=[('lr', log_model), ('dt', dt_model), ('svm', svm_model)],
    voting='hard')  # Use 'soft' for soft voting

voting_clf.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


This `VotingClassifier` combines multiple models (`log_model`, `dt_model`, and `svm_model`) to make a final prediction. 

### What it does:
- **`estimators`**: These are the individual models being combined: logistic regression (`lr`), decision tree (`dt`), and support vector machine (`svm`).
- **`voting='hard'`**: Uses majority voting (chooses the class predicted by the majority of models).

The idea is to create a more robust model by combining the strengths of multiple classifiers.

If you use **voting='soft'** in a VotingClassifier, the classifier will predict the class label based on the average of predicted probabilities from each model (instead of the majority vote like in voting='hard').

In [6]:
y_pred = voting_clf.predict(X_test)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

       Besni       0.91      0.83      0.87        83
     Kecimen       0.87      0.93      0.90        97

    accuracy                           0.88       180
   macro avg       0.89      0.88      0.88       180
weighted avg       0.88      0.88      0.88       180



## **Ensemble on REGRESSION PROBLEM**

In [7]:
import pandas as pd

df = pd.read_csv("Datasets/regression_home_prices.csv")
df.head()

Unnamed: 0,area_sqr_ft,price_lakhs,bedrooms
0,656.0,39.0,2
1,1260.0,83.2,2
2,1057.0,86.6,3
3,1259.0,59.0,2
4,1800.0,140.0,3


In [8]:
from sklearn.model_selection import train_test_split

X = df[["area_sqr_ft","bedrooms"]]
y = df["price_lakhs"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [9]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)
model.score(X_test, y_test)

0.8874887686858771

In [10]:
model.predict(X_test[:3])

array([79.15940002, 70.96469522, 63.51496358])

In [11]:
y_test[:3].to_list()

[68.0, 80.1, 69.0]

In [12]:
from sklearn.ensemble import VotingRegressor

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor

# Create different regression models
lin_reg = LinearRegression()
ridge_reg = Ridge(alpha=1.0)
dt_reg = DecisionTreeRegressor()


vr = VotingRegressor(estimators=[
    ('lr', lin_reg),
    ('rr', ridge_reg),
    ('dr', dt_reg)
])

vr.fit(X_train, y_train)
vr.score(X_test, y_test)

0.870340859079508

In [13]:
vr.predict(X_test[:3])

array([80.22148778, 70.08163805, 65.10601709])

In [14]:
y_test[:3].to_list()

[68.0, 80.1, 69.0]