In [1]:
#1.Debug the given code
import pandas as pd

data = {'Feature1': ['10', '20', 'Thirty'],  # 'Thirty' is not a valid number
        'Feature2': [5.5, 6.7, 8.9]}

df = pd.DataFrame(data)
df['Feature1'] = df['Feature1'].astype(int)  # Error: Cannot convert 'Thirty' to int

print(df)



ValueError: invalid literal for int() with base 10: 'Thirty'

In [2]:
#corrected code
import pandas as pd

data = {'Feature1': ['10', '20', 'Thirty'],  # 'Thirty' is not a valid number
        'Feature2': [5.5, 6.7, 8.9]}

df = pd.DataFrame(data)
df['Feature1'] = pd.to_numeric(df['Feature1'],errors='coerce')  # Error: Cannot convert 'Thirty' to int

print(df)


   Feature1  Feature2
0      10.0       5.5
1      20.0       6.7
2       NaN       8.9


In [None]:
#2.
import pandas as pd

data = {'A': [1, 2, None],  # Missing value
        'B': [4, None, 6]}  # Missing value

df = pd.DataFrame(data)
mean_value = df.mean()
df.fillna(mean_value)  # Error: fillna() does not modify in place

print(df)


In [4]:
#corrected code
import pandas as pd

data = {'A': [1, 2, None],  # Missing value
        'B': [4, None, 6]}  # Missing value

df = pd.DataFrame(data)
mean_value = df.mean()
df.fillna(mean_value,inplace=True)  # fillna by default creating new df and to update in the source df inplace=True needs to be added

print(df)


     A    B
0  1.0  4.0
1  2.0  5.0
2  1.5  6.0


In [5]:
#3.
from sklearn.linear_model import LinearRegression
import numpy as np

X = np.array([1, 2, 3, 4, 5])  # Error: X should be 2D
y = np.array([2, 4, 6, 8, 10])

model = LinearRegression()
model.fit(X, y)


NameError: name 'LinearRegression' is not defined

In [7]:
#corrected code
from sklearn.linear_model import LinearRegression
import numpy as np

X = np.array([1, 2, 3, 4, 5]).reshape(-1,1) # converts 5 rows and 1 columns
y = np.array([2, 4, 6, 8, 10])

model = LinearRegression()
model.fit(X, y)

In [None]:
#4.
from sklearn.preprocessing import StandardScaler
import numpy as np

data = np.array([10, 20, 30, 40, 50])  # Error: Should be 2D

scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

print(scaled_data)


In [8]:
#corrected code
from sklearn.preprocessing import StandardScaler
import numpy as np

data = np.array([10, 20, 30, 40, 50]).reshape(-1,1)  # input to StandardScaler must be 2D

scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

print(scaled_data)


[[-1.41421356]
 [-0.70710678]
 [ 0.        ]
 [ 0.70710678]
 [ 1.41421356]]


In [None]:
#5.
from sklearn.linear_model import LogisticRegression

X = [[1, 2], [3, 4], [5, 6]]
y = ['yes', 'no', 'yes']  # Error: Labels should be numeric

model = LogisticRegression()
model.fit(X, y)


In [12]:
#corrected code
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

X = [[1, 2], [3, 4], [5, 6]]
y = ['yes', 'no', 'yes']  # Error: Labels should be numeric
y_encoded=LabelEncoder().fit_transform(y) 
print(y_encoded)

model = LogisticRegression()
model.fit(X, y_encoded)

[1 0 1]


In [None]:
#6.import pandas as pd
from sklearn.preprocessing import OneHotEncoder

df = pd.DataFrame({'Category': ['A', 'B', 'C', 'A']})

encoder = OneHotEncoder()
encoded = encoder.fit_transform(df['Category'])  # Error: Data should be reshaped
print(encoded)


In [20]:
#corrected code
#6.
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

df = pd.DataFrame({'Category': ['A', 'B', 'C', 'A']})

encoder = OneHotEncoder()
#encoded = encoder.fit_transform(df[['Category']])  # Error: Data should be reshaped
encoded = encoder.fit_transform(df['Category'].values.reshape(-1,1))

print(encoded)
print(pd.get_dummies(df['Category']))

  (0, 0)	1.0
  (1, 1)	1.0
  (2, 2)	1.0
  (3, 0)	1.0
       A      B      C
0   True  False  False
1  False   True  False
2  False  False   True
3   True  False  False


In [None]:
#6.
from sklearn.model_selection import train_test_split

X = [[1, 2], [3, 4], [5, 6], [7, 8]]
y = [0, 1, 0]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


In [22]:
#corrected code
from sklearn.model_selection import train_test_split

X = [[1, 2], [3, 4], [5, 6], [7, 8]]
y = [0, 1, 0,1] #output labels should be equal to the number of features

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)



In [None]:
#7.
from sklearn.linear_model import LogisticRegression

X_train = [[1, 2], [3, 4], [5, 6]]
y_train = ["yes", "no", "yes"]  # Error: LogisticRegression expects numerical labels

model = LogisticRegression()
model.fit(X_train, y_train)


In [24]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

X_train = [[1, 2], [3, 4], [5, 6]]
y_train= ['yes', 'no', 'yes']  # Error: Labels should be numeric
y_encoded=LabelEncoder().fit_transform(y_train) 
print(y_encoded)

model = LogisticRegression()
model.fit(X, y_encoded)

[1 0 1]


In [None]:
#8.
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

# Creating a dataset with missing values
X_train = np.array([[1, 2], [3, np.nan], [5, 6]])
y_train = np.array([10, 20, 30])

model = LinearRegression()
model.fit(X_train, y_train)  


In [26]:
#corrected code
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

# Creating a dataset with missing values
X_train = np.array([[1, 2], [3, np.nan], [5, 6]])
y_train = np.array([10, 20, 30])

df=pd.DataFrame(X_train)
X_train_mean=df.fillna(df.mean())
#X_train_new=df_mean.values
model = LinearRegression()
model.fit(X_train_mean, y_train) 

In [None]:
#9.
from sklearn.tree import DecisionTreeClassifier

X_train = [[1, 2], [3, 4], [5, 6]]
y_train = ["spam", "ham", "spam"]  # Error: String labels not allowed

model = DecisionTreeClassifier()
model.fit(X_train, y_train)


In [29]:
#corrected code
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

X_train = [[1, 2], [3, 4], [5, 6]]
y_train = ["spam", "ham", "spam"]  # Error: String labels not allowed
y_train_encoded=LabelEncoder().fit_transform(y_train)

model = DecisionTreeClassifier()
model.fit(X_train, y_train_encoded)

In [None]:
#10.
from sklearn.svm import SVC

X_train = [[1, 100], [2, 200], [3, 300]]
y_train = [0, 1, 0]

model = SVC()
model.fit(X_train, y_train)
print(model.predict([[1, 150]]))  # Unreliable output due to large-scale difference


In [31]:
#corrected code
#10.
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

X_train = [[1, 100], [2, 200], [3, 300]]
y_train = [0, 1, 0]

X_train_scaled=StandardScaler().fit_transform(X_train)
model = SVC()
model.fit(X_train_scaled, y_train)
print(model.predict([[1, 150]]))  # Unreliable output due to large-scale difference

[0]
