## Task 1: Dataset Selection

Data Preprocessing:
- Use visualizations (histograms, scatter plots, bar graphs, etc.) to understand the
distribution of features and identify any potential patterns/dependencies or
outliers.
- Identify the data types of each feature (numeric, categorical, text, etc.). For
numeric data, show its characteristics like mean, median, standard deviation, etc.
- Identify and handle missing values (null values) in the data. This could involve
removing rows with missing values, fixing missing values with appropriate
strategies, etc.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import random

### Loading  Data 

In [None]:
df = pd.read_csv("HCV-Egy-Data.csv")
df

### Data Exploration

In [None]:
df.hist(figsize=(40,20))

In [None]:
plt.figure(figsize=(28, 28))  # Adjust the width and height as needed

sns.heatmap(df.corr(), cmap='Reds', annot=True, fmt='.2f')

In [None]:
df.isnull().sum()

In [None]:
num_features = 29
fig, axes = plt.subplots(nrows=num_features, ncols=1, figsize=(10, 5 * num_features))

# Loop through each feature to create a PDF plot
for i, column in enumerate(df):
    # Plotting the density
    df[column].plot(kind='density', ax=axes[i], color='blue', alpha=0.5, label='PDF')

    # Calculate mean, median, and mode
    mean = df[column].mean()
    median = df[column].median()
    mode = df[column].mode()[0]

    # Marking mean, median, and mode with horizontal lines
    axes[i].axvline(x=mean , color='red', linestyle='--', label=f'Mean: {mean:.2f}')  # Adjusted y-value for visibility
    axes[i].axvline(x=median, color='green', linestyle='--', label=f'Median: {median:.2f}')  # Adjusted y-value for visibility
    axes[i].axvline(x=mode , color='purple', linestyle='--', label=f'Mode: {mode:.2f}')  # Adjusted y-value for visibility

    # Adding titles and labels
    axes[i].set_title(f'Probability Density Function for {column}')
    axes[i].set_xlabel(column)
    axes[i].set_ylabel('Density')
    axes[i].legend()
    axes[i].grid()

# Adjust layout and show the plot
plt.tight_layout()
plt.show()

In [None]:
pd.set_option('display.max_rows', None)
df.dtypes

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.plot(kind='box', subplots=True, sharex=False, sharey=False, layout=(10,3), figsize=(15,25))

In [None]:
Q1 = np.percentile(df['ALT after 24 w'], 25)
Q3 = np.percentile(df['ALT after 24 w'], 75)
IQR = Q3 - Q1
df = df[(df['ALT after 24 w'] >= Q1 - 1.5 * IQR) & (df['ALT after 24 w'] <= Q3 + 1.5 * IQR)]

Q1 = np.percentile(df['RNA 12'], 25)
Q3 = np.percentile(df['RNA 12'], 75)
IQR = Q3 - Q1
df = df[(df['RNA 12'] >= Q1 - 1.5 * IQR) & (df['RNA 12'] <= Q3 + 1.5 * IQR)]

df.plot(kind='box', subplots=True, sharex=False, sharey=False, layout=(10,3), figsize=(15,25))

# Task 2: Update NN weights using Perceptron Rule

### Shuffle and split data

In [None]:
shuffled_data = df.sample(frac=1)

X = df.iloc[:,:-1].values
Y = df.iloc[:,-1].values

X_train, m_test, Y_train, my_test = train_test_split(X, Y, test_size = 0.30, random_state = 0)

X_validate, X_test, Y_validate, Y_test = train_test_split(m_test, my_test, test_size = 1/3, random_state = 0)


### Build Neural Network

In [None]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def targetToVector(target):
  return [1 if i+1 == target else 0 for i in range(4)]

def fitness(predictedVector, targetVector):
    return sum([abs(targetVector[i] - predictedVector[i]) for i in range(len(predictedVector))])

def randomPopulation(size=500):
   W1s = [[[random.randint(-10, 10) for i in range(28)] for j in range(10)] for count in range(size)]
   W2s = [[[random.randint(-10, 10) for i in range(10)] for j in range(4)] for count in range(size)]
   B1s = [[random.randint(-5, 5) for i in range(10)] for count in range(size)]
   B2s = [[random.randint(-5, 5) for i in range(4)] for count in range(size)]
   return (W1s, B1s) , (W2s, B2s)

def crossover(rate, population):
    tmp_pop = population.copy()
    crossed_pop = []
    def cross(g1, g2):
        cp = random.randrange(0, len(population[0]))
        tg1_1 = g1[:cp]
        tg1_2 = g1[cp:]
        tg2_1 = g2[:cp]
        tg2_2 = g2[cp:]
        return (tg1_1 + tg2_2, tg2_1 + tg1_2)
    for i in range(int(rate*len(population))):
        p1 = tmp_pop[random.randrange(0, len(tmp_pop))]
        tmp_pop.remove(p1)
        p2 = tmp_pop[random.randrange(0, len(tmp_pop))]
        tmp_pop.remove(p2)
        c1, c2 = cross(p1, p2)
        crossed_pop.extend([c1, c2])
    return crossed_pop + tmp_pop

def mutationWeights(rate, weightCount, population):
   newPop = population.copy()
   limit = rate*(len(population))
   for i, gene in enumerate(population):
      if (random() <= rate):
         if (limit > 0):
            limit -=1
            for j in range(weightCount):
              idx = random.randrange(0, len(gene))
              gene[idx] = random.randint(-10, 10)
              newPop[i] = gene
         else:
            break 
         
   return newPop

def mutationBias(rate,  population):
   newPop = population.copy()
   limit = rate*(len(population))
   for i, gene in enumerate(population):
      if (random() <= rate):
         if (limit > 0):
            limit -=1
            gene = random.randint(-5, 5)
            newPop[i] = gene
         else:
            break 
         
   return newPop
   
def selection(rate, population, fitness):
   selected = list(map(lambda y: y[0], sorted([(population[i], fitness[i]) for i in range(len(population))], key = lambda x: x[1])[:int(rate*len(population))]))
   return selected.extend(selected)


In [None]:
W1 = [[random.randint(-10, 10) for i in range(28)] for j in range(10)]
W2 = [[random.randint(-10, 10) for i in range(10)] for j in range(4)] # to be changed
B1 = [random.randint(-5, 5) for i in range(10)]
B2 = [random.randint(-5, 5) for i in range(4)]


sigs = np.vectorize(sigmoid)

exitCondition = False
counter = 0
while (counter < 2000):
    counter += 1
    np.random.shuffle(X_train)
    batches = [[X_train[bf * 69 + i] for i in range(69) ] for bf in range(14)]

    number_of_batches = len(batches)

    for batch in batches:
        results = []

        for dataPoint in batch:
           midResult = np.dot(W1, dataPoint)
           actv_midResult = sigs(midResult + B1)

           finalResult = np.dot(W2, actv_midResult)
           actv_finalResult = sigs(finalResult + B2)
           print(actv_finalResult)
           results.append(actv_finalResult)

           


    
        

        # code for updating weights and biases

    # code for validation
 
 # code for testing


           



