In [8]:
import pandas as pd;
import numpy as np;

In [9]:
df = pd.read_csv('covid_data_100rows.csv')
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,58,Female,103.6,Severe,Chennai,No
1,71,Male,103.8,Severe,Chennai,No
2,48,Female,99.0,Mild,Chennai,Yes
3,34,Male,99.1,Mild,Mumbai,Yes
4,62,Female,100.4,Severe,Chennai,Yes


In [10]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(
    df.drop('has_covid', axis=1), 
    df['has_covid'], 
    test_size=0.3, 
    random_state=1
)
x_train.shape, x_test.shape

((70, 5), (30, 5))

In [11]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer

transformer = ColumnTransformer(
    transformers=[
        ('tnf1', SimpleImputer(), ['fever']),               # Fix 1: column name must match exactly
        ('tnf2', OrdinalEncoder(categories=[['Mild', 'Severe']]), ['cough']),  # Fix 2: correct encoder name
        ('tnf3', OneHotEncoder(), ['city'])                                # Fix 3: specify column for OHE
    ],
    remainder='passthrough'
)


In [13]:
transformer.fit_transform(x_train)

array([[97.6, 1.0, 0.0, 0.0, 1.0, 0.0, 39, 'Male'],
       [103.8, 0.0, 0.0, 0.0, 1.0, 0.0, 27, 'Male'],
       [101.7, 1.0, 1.0, 0.0, 0.0, 0.0, 28, 'Male'],
       [99.9, 1.0, 0.0, 1.0, 0.0, 0.0, 33, 'Female'],
       [99.3, 1.0, 0.0, 1.0, 0.0, 0.0, 48, 'Female'],
       [100.9, 0.0, 0.0, 1.0, 0.0, 0.0, 79, 'Male'],
       [99.6, 0.0, 0.0, 1.0, 0.0, 0.0, 76, 'Male'],
       [100.35593220338983, 1.0, 0.0, 0.0, 1.0, 0.0, 79, 'Male'],
       [100.0, 0.0, 1.0, 0.0, 0.0, 0.0, 63, 'Male'],
       [100.3, 0.0, 0.0, 0.0, 0.0, 1.0, 44, 'Male'],
       [99.1, 0.0, 0.0, 0.0, 0.0, 1.0, 23, 'Male'],
       [98.5, 1.0, 0.0, 0.0, 1.0, 0.0, 40, 'Female'],
       [100.1, 1.0, 0.0, 1.0, 0.0, 0.0, 36, 'Male'],
       [100.8, 0.0, 0.0, 0.0, 1.0, 0.0, 55, 'Female'],
       [103.0, 0.0, 0.0, 0.0, 1.0, 0.0, 25, 'Female'],
       [101.0, 1.0, 0.0, 1.0, 0.0, 0.0, 35, 'Male'],
       [98.3, 1.0, 0.0, 0.0, 1.0, 0.0, 63, 'Male'],
       [103.8, 0.0, 0.0, 1.0, 0.0, 0.0, 63, 'Female'],
       [101.7, 1.0, 1.0, 0.0