### Task 1: Non Linear Dataset

#### Importing libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

#### Loading datasets

In [None]:
# Read the dataset while skipping the first line (meta-information)
df = pd.read_csv("../Datasets/NonLinearDataset/NonLinearDataset.txt", sep=" ", header=None, skiprows=1)
df = df.iloc[:, :2]  # Select only the first two columns (features)

# Convert all values to numeric, coercing errors to NaN
df = df.apply(pd.to_numeric, errors='coerce')

#### Dividing training and testing data

In [None]:
# Assign class labels based on the dataset structure
# The first 500 examples belong to class 1, the next 500 to class 2, and the remaining 1000 to class 3
df['class'] = np.concatenate([np.zeros(500), np.ones(500), np.full(1000, 2)])

# Split the data into features (X) and labels (y)
X = df.iloc[:, :2].values  # First two columns as features
y = df['class'].values    # 'class' column as the target labels

# Cast y to integer type to ensure correct processing
y = y.astype(int)

# Split into train and test data (70% training, 30% testing) based on the given order
train_size = int(0.7 * len(df))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Now we have the training and testing data, with three classes
print("Training Data Shape:", X_train.shape)
print("Testing Data Shape:", X_test.shape)
print("Class Distribution in Training Data:", np.bincount(y_train))
print("Class Distribution in Testing Data:", np.bincount(y_test))

# Ensure all classes are represented in the train and test sets
train_data_list = [X_train[y_train == i] for i in range(3)]
test_data_list = [X_test[y_test == i] for i in range(3)]

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Perceptron
from sklearn.preprocessing import PolynomialFeatures  # Import for feature transformation
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split

# Read the dataset while skipping the first line (meta-information)
df = pd.read_csv("../Datasets/NonLinearDataset/NonLinearDataset.txt", sep=" ", header=None, skiprows=1)
df = df.iloc[:, :2]  # Select only the first two columns (features)

# Convert all values to numeric, coercing errors to NaN
df = df.apply(pd.to_numeric, errors='coerce')

# Assign class labels based on the dataset structure
df['class'] = np.concatenate([np.zeros(500), np.ones(500), np.full(1000, 2)])

# Split the data into features (X) and labels (y)
X = df.iloc[:, :2].values  # First two columns as features
y = df['class'].values    # 'class' column as the target labels

# Cast y to integer type to ensure correct processing
y = y.astype(int)

# Perform stratified split to ensure class distribution is maintained
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# Debugging: print the class distribution in the training and testing sets
print("Class Distribution in Training Data:", np.bincount(y_train))
print("Class Distribution in Testing Data:", np.bincount(y_test))

# Transform features using PolynomialFeatures
poly = PolynomialFeatures(degree=3)  # Use polynomial features (degree 3, for example)