In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import seaborn as sns


In [3]:
df=sns.load_dataset('iris')
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [4]:
selected_features = ['sepal_length', 'sepal_width','petal_length','petal_width']
X = df[selected_features]
y = df['species']

display(X)
display(y)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


Unnamed: 0,species
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa
...,...
145,virginica
146,virginica
147,virginica
148,virginica


In [5]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=selected_features)

X_categorical = pd.get_dummies(df['species'], prefix='species')

X_transformed = pd.concat([X_scaled_df, X_categorical], axis=1)

display(X_transformed)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species_setosa,species_versicolor,species_virginica
0,-0.900681,1.019004,-1.340227,-1.315444,True,False,False
1,-1.143017,-0.131979,-1.340227,-1.315444,True,False,False
2,-1.385353,0.328414,-1.397064,-1.315444,True,False,False
3,-1.506521,0.098217,-1.283389,-1.315444,True,False,False
4,-1.021849,1.249201,-1.340227,-1.315444,True,False,False
...,...,...,...,...,...,...,...
145,1.038005,-0.131979,0.819596,1.448832,False,False,True
146,0.553333,-1.282963,0.705921,0.922303,False,False,True
147,0.795669,-0.131979,0.819596,1.053935,False,False,True
148,0.432165,0.788808,0.933271,1.448832,False,False,True


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.preprocessing import LabelEncoder

In [7]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train_encoded, y_test_encoded = train_test_split(X_transformed, y_encoded, test_size=0.2, random_state=42) # Changed to use y_encoded

model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train_encoded)

y_pred_encoded = model.predict(X_test)

accuracy = accuracy_score(y_test_encoded, y_pred_encoded)
f1_score=f1_score(y_test_encoded, y_pred_encoded, average='weighted')
precision_score=precision_score(y_test_encoded, y_pred_encoded, average='weighted')
print(f'Accuracy: {accuracy}')
print(f'F1 Score: {f1_score}')
print(f'Precision Score: {precision_score}')
print(classification_report(y_test_encoded, y_pred_encoded, target_names=le.classes_))

Accuracy: 1.0
F1 Score: 1.0
Precision Score: 1.0
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [8]:
df.info()

display(df.describe())

print(f"Number of unique values in 'target': {df['species'].nunique()}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


Number of unique values in 'target': 3


In [9]:
selected_features_iris = [col for col in df.columns if col != 'species']

X = df[selected_features_iris]
y = df['species']

# Display the first few rows of X and y
display(X.head())
display(y.head())

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


Unnamed: 0,species
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa


In [10]:
from sklearn.preprocessing import StandardScaler

# Check for missing values
missing_values = X.isnull().sum()
print("Missing values per column:")
print(missing_values)


scaler_iris = StandardScaler()
X_scaled = scaler_iris.fit_transform(X)

X_transformed = pd.DataFrame(X_scaled, columns=X.columns)

display(X_transformed.head())

Missing values per column:
sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
dtype: int64


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,-0.900681,1.019004,-1.340227,-1.315444
1,-1.143017,-0.131979,-1.340227,-1.315444
2,-1.385353,0.328414,-1.397064,-1.315444
3,-1.506521,0.098217,-1.283389,-1.315444
4,-1.021849,1.249201,-1.340227,-1.315444


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print('Accuracy:',accuracy)
print('precision score:',precision_score)
print('f1 score:',f1_score)
print(classification_report(y_test, y_pred))

Accuracy: 1.0
precision score: 1.0
f1 score: 1.0
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

