# Introduce Noise in the Data

In this notebook, we add Gaussian and Impulse noise to the datasets.

## Load Required Packages

In [14]:
import pandas as pd
import numpy as np

## Add Gaussian Noise

### Iris Dataset

In [15]:
df = pd.read_csv("../datasets/clean/iris.csv", index_col=0)

In [16]:
X = df.loc[:, df.columns != "target"]
y = df.loc[:, df.columns == "target"]

In [17]:
std_dev = 0.6
mean = 0
noise = np.random.normal(mean, std_dev, X.shape)

In [18]:
noise_X = X + noise

In [19]:
noisy_df = pd.merge(noise_X, y, left_index=True, right_index=True)

In [20]:
noisy_df.head(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.900994,4.366613,1.625918,0.593806,0
1,5.083111,2.671822,2.033391,1.412952,0
2,5.031355,2.254042,1.763903,-0.348187,0
3,4.834744,3.261594,3.173536,0.140325,0
4,5.021229,2.524706,0.968853,0.670659,0


In [21]:
# Save the noisy data into the noisy dataset folder
noisy_df.to_csv("../datasets/noisy/iris_noisy.csv")

### IMDB Dataset

In [22]:
df = pd.read_csv("../datasets/clean/imdb.csv")

In [23]:
def add_gaussian_noise(text, mean=0, std_dev=0.1):
    """
    Add Gaussian noise to text.
    
    Parameters:
        text (str): The input text to which noise will be added.
        mean (float): Mean of the Gaussian distribution.
        std_dev (float): Standard deviation of the Gaussian distribution.
    
    Returns:
        str: The text with added Gaussian noise.
    """
    noisy_text = list(text)
    
    # Generate Gaussian noise with the same length as the text
    noise = np.random.normal(mean, std_dev, len(noisy_text))
    
    for i in range(len(noisy_text)):
        noisy_text[i] = chr(ord(noisy_text[i]) + int(noise[i]))
    
    return ''.join(noisy_text)


In [24]:
noisy_df = df.copy()
noisy_df['review'] = noisy_df['review'].apply(lambda x: add_gaussian_noise(x, mean=0, std_dev=0.6))

In [27]:
df.head(5)

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [25]:
noisy_df.head(5)

Unnamed: 0,review,sentiment
0,Pne of the other reviewdrs has mentipoed uhat ...,positive
1,A wondergullittme procubsion. <br /><br/>The...,positive
2,I thought tgis was a wpnderftl way to spend!ti...,positive
3,Bbsically there's a family whfre a little boy ...,negative
4,"Petter M`ttei'r !Lovein the Time of Money"" is...",positive


In [26]:
# Save the noisy data into the noisy dataset folder
noisy_df.to_csv("../datasets/noisy/imdb_noisy.csv")