In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Create the dataset
data = {
    'Country': ['France', 'Spain', 'Germany', 'Spain', 'Germany', 
               'France', 'Spain', 'France', 'Germany', 'France'],
    'Age': [44, 27, 30, 38, 40, 35, 31, 48, 50, 37],
    'Salary': [72000, 48000, 54000, 61000, 85000, 
              58000, 52000, 79000, 83000, 67000],
    'Purchased': ['No', 'Yes', 'No', 'No', 'Yes', 
                 'Yes', 'No', 'Yes', 'No', 'Yes']
}
df = pd.DataFrame(data)

# Method 1: One-Hot Encoding (Dummy Variables)
print("One-Hot Encoded Data:")
df_onehot = pd.get_dummies(df, columns=['Country', 'Purchased'])
print(df_onehot)

# Method 2: Label Encoding
print("\nLabel Encoded Data:")
df_label = df.copy()
le = LabelEncoder()
df_label['Country'] = le.fit_transform(df_label['Country'])
df_label['Purchased'] = le.fit_transform(df_label['Purchased'])
print(df_label)

# Method 3: Ordinal Encoding (Manual mapping)
print("\nOrdinal Encoded Data:")
df_ordinal = df.copy()
country_mapping = {'France': 0, 'Spain': 1, 'Germany': 2}
purchased_mapping = {'No': 0, 'Yes': 1}
df_ordinal['Country'] = df_ordinal['Country'].map(country_mapping)
df_ordinal['Purchased'] = df_ordinal['Purchased'].map(purchased_mapping)
print(df_ordinal)

One-Hot Encoded Data:
   Age  Salary  Country_France  Country_Germany  Country_Spain  Purchased_No  \
0   44   72000            True            False          False          True   
1   27   48000           False            False           True         False   
2   30   54000           False             True          False          True   
3   38   61000           False            False           True          True   
4   40   85000           False             True          False         False   
5   35   58000            True            False          False         False   
6   31   52000           False            False           True          True   
7   48   79000            True            False          False         False   
8   50   83000           False             True          False          True   
9   37   67000            True            False          False         False   

   Purchased_Yes  
0          False  
1           True  
2          False  
3          False  
4 

In [1]:
# Import libraries
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load data and convert to binary classification
X, y = load_iris(return_X_y=True)
y_binary = (y == 2).astype(int)  # 1 for Virginica, 0 otherwise

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression(max_iter=200).fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print(classification_report(y_test, y_pred))

Accuracy: 1.00
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

