# Introduce Gaussian Noise in the Data

In this notebook, we add Gaussian and Impulse noise to the datasets.

## Load Required Packages

In [48]:
import pandas as pd
import numpy as np
import random

## Add Gaussian Noise

### Iris Dataset

In [49]:
df = pd.read_csv("../datasets/clean/iris.csv", index_col=0)

In [50]:
X = df.loc[:, df.columns != "target"]
y = df.loc[:, df.columns == "target"]

In [51]:
std_dev = 0.1
mean = 0
noise = np.random.normal(mean, std_dev, X.shape)

In [52]:
noise_X = X + noise

In [53]:
noisy_df = pd.merge(noise_X, y, left_index=True, right_index=True)

In [54]:
noisy_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.172166,3.426976,1.260269,0.104169,0
1,4.890659,2.873418,1.358275,0.227448,0
2,4.622040,3.421973,1.352043,0.197223,0
3,4.466489,3.093766,1.503757,0.150277,0
4,4.850348,3.694833,1.546262,0.186400,0
...,...,...,...,...,...
145,6.664541,3.110876,5.313867,2.386275,2
146,6.273020,2.280747,5.212568,1.880044,2
147,6.482547,2.912663,5.285851,1.947164,2
148,6.189971,3.279226,5.455456,2.309687,2


In [56]:
# Save the noisy data into the noisy dataset folder
noisy_df.to_csv("../datasets/noisy/iris_noisy.csv")

### IMDB Dataset

In [57]:
df = pd.read_csv("../datasets/clean/imdb.csv")

In [58]:
df

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
...,...,...
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative


In [59]:
def add_gaussian_noise(text, mean=0, std_dev=0.1):
    """
    Add Gaussian noise to text.
    
    Parameters:
        text (str): The input text to which noise will be added.
        mean (float): Mean of the Gaussian distribution.
        std_dev (float): Standard deviation of the Gaussian distribution.
    
    Returns:
        str: The text with added Gaussian noise.
    """
    noisy_text = list(text)
    
    # Generate Gaussian noise with the same length as the text
    noise = np.random.normal(mean, std_dev, len(noisy_text))
    
    for i in range(len(noisy_text)):
        noisy_text[i] = chr(ord(noisy_text[i]) + int(noise[i]))
    
    return ''.join(noisy_text)


In [75]:
noisy_df = df.copy()
noisy_df['review'] = noisy_df['review'].apply(lambda x: add_gaussian_noise(x, mean=0, std_dev=0.5))

In [76]:
# Save the noisy data into the noisy dataset folder
noisy_df.to_csv("../datasets/noisy/imdb_noisy.csv")