# Import libraies


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import itertools


In [None]:

# -------------------------
# تحميل وتنظيف الداتا
# -------------------------
df = pd.read_csv("penguins.csv")
df['CulmenLength']=df['CulmenLength'].fillna(df['CulmenLength'].mean())
df['CulmenDepth']=df['CulmenDepth'].fillna(df['CulmenDepth'].mean())
df['BodyMass']=df['BodyMass'].fillna(df['BodyMass'].mean())
df['FlipperLength']=df['FlipperLength'].fillna(df['FlipperLength'].mean())
# ترميز العمود النصي OriginLocation إلى أرقام
le = LabelEncoder()
df["OriginLocation"] = le.fit_transform(df["OriginLocation"])
df.head(51)

# أول 150 صف (3 classes × 50 صف لكل كلاس)
df = df.iloc[:150].reset_index(drop=True)

# تحديد رقم لكل class (0,1,2)
df["Target"] = [0]*50 + [1]*50 + [2]*50

# -------------------------
# أسماء الـ features
# -------------------------
feature_names = ["CulmenLength", "CulmenDepth", "FlipperLength", "BodyMass", "OriginLocation"]

# -------------------------
# signum & perceptron
# -------------------------
def signum(x):
    return 1 if x >= 0 else -1

def perceptron_train(X, T, eta=0.01, epochs=20):
    n_samples, n_features = X.shape
    W = np.random.randn(n_features)
    b = np.random.randn()
    for _ in range(epochs):
        for i in range(n_samples):
            y_i = signum(np.dot(W, X[i]) + b)
            if y_i != T[i]:
                W += eta * (T[i] - y_i) * X[i]
                b += eta * (T[i] - y_i)
    return W, b

# -------------------------
# جميع تركيبات الميزات (5 choose 2 = 10)
# -------------------------
feature_pairs = list(itertools.combinations(feature_names, 2))

# -------------------------
# ازواج الكلاسات (0-1, 0-2, 1-2)
# -------------------------
class_pairs = list(itertools.combinations([0,1,2], 2))

for (c1, c2) in class_pairs:
    subset = df[(df["Target"] == c1) | (df["Target"] == c2)]
    print(f"\nDataset: class_{c1}_{c2}")

    # نختار 30 تدريب و20 اختبار من كل كلاس عشوائيًا
    train_c1 = subset[subset["Target"] == c1].sample(n=30, random_state=42)
    test_c1  = subset[subset["Target"] == c1].drop(train_c1.index)
    train_c2 = subset[subset["Target"] == c2].sample(n=30, random_state=42)
    test_c2  = subset[subset["Target"] == c2].drop(train_c2.index)

    train_df = pd.concat([train_c1, train_c2]).sample(frac=1, random_state=42).reset_index(drop=True)
    test_df  = pd.concat([test_c1, test_c2]).sample(frac=1, random_state=42).reset_index(drop=True)

    # نحول التارجت لـ +1 و -1
    train_df["Target"] = train_df["Target"].apply(lambda x: 1 if x == c1 else -1)
    test_df["Target"]  = test_df["Target"].apply(lambda x: 1 if x == c1 else -1)

    # -------------------------
    # تجربة كل زوج features
    # -------------------------
    for f1, f2 in feature_pairs:
        X_train = train_df[[f1, f2]].values.astype(float)
        T_train = train_df["Target"].values
        X_test  = test_df[[f1, f2]].values.astype(float)
        T_test  = test_df["Target"].values

        W, b = perceptron_train(X_train, T_train, eta=0.01, epochs=10)

        preds = np.array([signum(np.dot(W, x_i) + b) for x_i in X_test])
        acc = np.mean(preds == T_test)

        print(f"   → Features: ({f1}, {f2}) | Test Accuracy: {acc*100:.2f}%")


# Read CVS

In [None]:
dataFrame = pd.read_csv("penguins.csv")

# print first five rows of data

In [None]:
dataFrame.head(51)

In [None]:
dataFrame.tail()

In [None]:
dataFrame.info()

In [None]:
dataFrame.describe()

In [None]:
dataFrame.shape

In [None]:
dataFrame.isnull().sum()

In [None]:
dataFrame['CulmenLength']=dataFrame['CulmenLength'].fillna(dataFrame['CulmenLength'].mean())
dataFrame['CulmenDepth']=dataFrame['CulmenDepth'].fillna(dataFrame['CulmenDepth'].mean())
dataFrame['BodyMass']=dataFrame['BodyMass'].fillna(dataFrame['BodyMass'].mean())
dataFrame['FlipperLength']=dataFrame['FlipperLength'].fillna(dataFrame['FlipperLength'].mean())

In [None]:
dataFrame.isnull().sum()

In [None]:
le = LabelEncoder()
dataFrame['OriginLocation_Encoded'] = le.fit_transform(dataFrame['OriginLocation'])
print(dataFrame[['OriginLocation', 'OriginLocation_Encoded']])

# Perceptron

In [16]:
dataFrame = pd.read_csv("penguins.csv")
dataFrame['CulmenLength']=dataFrame['CulmenLength'].fillna(dataFrame['CulmenLength'].mean())
dataFrame['CulmenDepth']=dataFrame['CulmenDepth'].fillna(dataFrame['CulmenDepth'].mean())
dataFrame['BodyMass']=dataFrame['BodyMass'].fillna(dataFrame['BodyMass'].mean())
dataFrame['FlipperLength']=dataFrame['FlipperLength'].fillna(dataFrame['FlipperLength'].mean())
columns = ['CulmenLength', 'CulmenDepth', 'BodyMass', 'FlipperLength']
for i in columns:
    q1 = np.percentile(dataFrame[i], 25)
    q3 = np.percentile(dataFrame[i], 75)
    norm_range = (q3 - q1) * 1.5
    # Identify lower outliers
    lower_outliers = dataFrame[dataFrame[i] < (q1 - norm_range)]
    # Identify upper outliers
    upper_outliers = dataFrame[dataFrame[i] > (q3 + norm_range)]
    # Count the total number of outliers
    outliers = len(lower_outliers) + len(upper_outliers)
    print(f"The number of outliers in {i}: {outliers}")
    # Replace outliers with adjusted values
    dataFrame[i] = np.where(dataFrame[i] < (q1 - norm_range), q1 - norm_range, dataFrame[i])
    dataFrame[i] = np.where(dataFrame[i] > (q3 + norm_range), q3 + norm_range, dataFrame[i])

The number of outliers in CulmenLength: 0
The number of outliers in CulmenDepth: 0
The number of outliers in BodyMass: 0
The number of outliers in FlipperLength: 0


In [None]:
import pandas as pd
import numpy as np

# ----------------------------
# تحميل البيانات
# ----------------------------
df = pd.read_csv("penguins.csv")

# نختار فئتين فقط (Gentoo و Adelie)
df = df[df["Species"].isin(["Gentoo", "Adelie"])]

# نحذف أي صف فيه بيانات ناقصة
# df = df.dropna(subset=["CulmenLength", "FlipperLength", "Species"])

# نحول الفئة إلى أرقام: Gentoo = +1, Adelie = -1
df["Target"] = df["Species"].apply(lambda x: 1 if x == "Gentoo" else -1)

# ----------------------------
# تقسيم البيانات: 30 عشوائي من كل كلاس تدريب، والباقي اختبار
# ----------------------------
class1 = df[df["Target"] == 1]
class2 = df[df["Target"] == -1]

# 30 عشوائي من كل كلاس
train_class1 = class1.sample(n=30, random_state=42)
test_class1  = class1.drop(train_class1.index)

train_class2 = class2.sample(n=30, random_state=42)
test_class2  = class2.drop(train_class2.index)

# دمج الكلاسات في train و test
train_df = pd.concat([train_class1, train_class2]).sample(frac=1, random_state=42).reset_index(drop=True)
test_df  = pd.concat([test_class1, test_class2]).sample(frac=1, random_state=42).reset_index(drop=True)

# فصل الـ Features والـ Target
X_train = train_df[["CulmenLength", "FlipperLength"]].values
T_train = train_df["Target"].values

X_test = test_df[["CulmenLength", "FlipperLength"]].values
T_test = test_df["Target"].values

print("Train size:", len(X_train))
print("Test size:", len(X_test))

# ----------------------------
# دالة signum
# ----------------------------
def signum(x):
    return 1 if x >= 0 else -1

# ----------------------------
# خوارزمية Perceptron
# ----------------------------
def perceptron_train(X, T, eta=0.01, epochs=20):
    n_samples, n_features = X.shape
    W = np.random.randn(n_features)
    b = np.random.randn()

    for epoch in range(epochs):
        for i in range(n_samples):
            x_i = X[i]
            t_i = T[i]
            y_i = signum(np.dot(W, x_i) + b)
            if y_i != t_i:
                L = (t_i - y_i)
                W = W + eta * L * x_i
                b = b + eta * L
    return W, b

# ----------------------------
# تدريب النموذج
# ----------------------------
W, b = perceptron_train(X_train, T_train, eta=0.01, epochs=10)

# ----------------------------
# تقييم النموذج
# ----------------------------
pred_train = np.array([signum(np.dot(W, x_i) + b) for x_i in X_train])
pred_test = np.array([signum(np.dot(W, x_i) + b) for x_i in X_test])

train_acc = np.mean(pred_train == T_train)
test_acc = np.mean(pred_test == T_test)

print("\nFinal Weights:", W)
print("Final Bias:", b)
print(f"Train Accuracy: {train_acc * 100:.2f}%")
print(f"Test Accuracy: {test_acc * 100:.2f}%")
name1='Adelie'
name2='Gentoo'
df_1 = df.iloc[:100]
# 2️⃣ آخر 100 صف
df_2 = df.iloc[50:]
if ( (name1=="Adelie" ) and (name2=="Gentoo")):
  df_3 = pd.concat([df.iloc[:50], df.iloc[100:]])









In [None]:
Y = dataFrame['Species']
numberOfColumns = len(dataFrame.columns)-2
for i in range(numberOfColumns):
    for j in range(i+1,numberOfColumns):
        X = dataFrame.iloc[:, [i, j]]
        print(f"Combination: ({i}, {j})")
        print(X.head())

