# By Kernel density

In [None]:
#In this method, We generate synthetic data using kernel density estimation (KDE).
#For each column of the real data, we fit a Gaussian kernel density estimator to the data using sklearn.neighbors.KernelDensity.
#We then use the fitted KDE to generate synthetic data using sample method.


import numpy as np
import pandas as pd
from sklearn.neighbors import KernelDensity

# Load the real data
real_data = pd.read_csv('real_data.csv')

# Generate synthetic data using kernel density estimation
num_samples = 1000 # Number of additional rows to generate
synth_data = pd.DataFrame()

for col in real_data.columns:
    # Fit kernel density estimation to the real data
    kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(real_data[col].values.reshape(-1,1))
    
    # Generate synthetic data using kernel density estimation
    synth_col = kde.sample(num_samples)
    
    # Append column to synthetic data
    synth_data[col] = synth_col

# Save synthetic data to CSV file
synth_data.to_csv('synthetic_data.csv', index=False)

# Generating data using Meant and STD of real data

In [None]:
#In this method, We generate synthetic data by looping over each column and using np.random.normal to generate data that follows 
#a normal distribution with the calculated mean and standard deviation.
#Finally, we append each column to a new DataFrame called synth_data.


import numpy as np
import pandas as pd
from scipy.stats import norm

# Load the real data
real_data = pd.read_csv('real_data.csv')

# Calculate mean and standard deviation for each column
means = real_data.mean()
stds = real_data.std()

# Generate synthetic data
num_samples = 1000 # Number of additional rows to generate
synth_data = pd.DataFrame()

for col in real_data.columns:
    mean = means[col]
    std = stds[col]
    
    # Generate data using normal distribution
    synth_col = np.random.normal(loc=mean, scale=std, size=num_samples)
    
    # Append column to synthetic data
    synth_data[col] = synth_col

# Save synthetic data to CSV file
synth_data.to_csv('synthetic_data.csv', index=False)


# By using multivariate normal distribution

In [None]:
#In this method, We generate synthetic data using a multivariate normal distribution
#with the calculated mean and covariance matrix using multivariate_normal.rvs from the scipy.stats module.
#Finally, we create a new DataFrame called synth_data using the generated synthetic data and column names from the real data.


# Import necessary libraries
import numpy as np
import pandas as pd
from scipy.stats import multivariate_normal

# Load the real data
real_data = pd.read_csv('real_data.csv')

# Calculate mean and covariance matrix for the real data
mean = real_data.mean()
cov = real_data.cov()

# Generate synthetic data
num_samples = 1000 # Number of additional rows to generate
synth_data = pd.DataFrame()

# Generate data using multivariate normal distribution
synth_data = pd.DataFrame(multivariate_normal.rvs(mean=mean, cov=cov, size=num_samples), columns=real_data.columns)

# Save synthetic data to CSV file
synth_data.to_csv('synthetic_data.csv', index=False)

#index=False to exclude the index column.