In [24]:
import pandas as pd
import numpy as np
from scipy.stats import truncnorm

In [68]:
def simulate_numerical_column(p_data, column_name, mean, sd, decimal_places, noise, distribution,low = None, upp = None, N=100):
    n = p_data.shape[0]
    if distribution == "Normal":
        rvs_object = truncnorm((low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)
    elif distribution == "Uniform":
        rvs_object = uniform(loc=mean, scale=sd)
    elif distribution == "Beta":
        rvs_object = beta(a=low, b=upp, loc=mean, scale=sd) 
    elif distribution == "Gamma":
        rvs_object = gamma(a=low, loc=mean, scale=sd)
    elif distribution == "Alpha":
        rvs_object = alpha(a=low, loc=mean, scale=sd)
    else: 
        print("Warning: Distribution is not supported, Normal distribution is the default")
        rvs_object = truncnorm((low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd) 
        
    values = np.zeros((n))
    col_data = []
    for i in range(N):
        x = np.array(rvs_object.rvs(n))
        values += x
        col_data.append(x)
        
    simulated_column_data = values/N 
    col_data.append(simulated_column_data)
    
    if noise > low:
        print("Warning: Noise is too large.")
    if noise == 0:
        noise_array = np.zeros((n))
    else:
        noise_array = np.random.uniform(-noise, noise, (n))
        
    simulated_noised_column = simulated_column_data + noise_array
    sn_rounded = np.round(simulated_noised_column, decimal_places)
    p_data[column_name] = sn_rounded.tolist()
    return p_data, col_data 

def simulate_categorical_column(p_data, column_name, class_given):
    n = p_data.shape[0]
    simulated_column = []
    for i in range(n):
        simulated_column.append(class_given)
    p_data[column_name] = simulated_column
    return p_data




In [71]:
data = pd.read_csv("users.csv")
data

Unnamed: 0,id,name,screen_name,statuses_count,followers_count,friends_count,favourites_count,listed_count,created_at,url,...,profile_sidebar_fill_color,profile_background_image_url,profile_background_color,profile_link_color,utc_offset,protected,verified,description,updated,dataset
0,3610511,Davide Dellacasa,braddd,20370,5470,2385,145,52,Fri Apr 06 10:58:22 +0000 2007,http://braddd.tumblr.com,...,FFF7CC,http://a0.twimg.com/profile_background_images/...,BADFCD,FF0000,3600.0,,,Founder of http://www.screenweek.it & http://w...,2015-02-14 10:54:49,E13
1,5656162,Simone Economo,eKoeS,3131,506,381,9,40,Mon Apr 30 15:08:42 +0000 2007,http://www.lineheight.net/,...,DDEEF6,http://a0.twimg.com/images/themes/theme1/bg.png,C0DEED,0084B4,3600.0,,,BSc degree (cum laude) in Computer Engineering...,2015-02-14 10:54:49,E13
2,5682702,tacone,tacone_,4024,264,87,323,16,Tue May 01 11:53:40 +0000 2007,http://t.co/LKrl1dZE,...,000000,http://a0.twimg.com/profile_background_images/...,1A1B1F,2FC2EF,3600.0,,,Cogito ergo bestemmio.,2015-02-14 10:54:49,E13
3,6067292,alesaura,alesstar,40586,640,622,1118,32,Tue May 15 16:55:16 +0000 2007,http://alesstar.wordpress.com/,...,95E8EC,http://a0.twimg.com/images/themes/theme4/bg.gif,0099B9,0099B9,3600.0,,,"Se la vita ti dà sarde, scapocciale!",2015-02-14 10:54:49,E13
4,6015122,Angelo,PerDiletto,2016,62,64,13,0,Sun May 13 19:52:00 +0000 2007,http://www.flickr.com/per_diletto,...,F6F6F6,http://a0.twimg.com/images/themes/theme18/bg.gif,ACDED6,038543,3600.0,,,Je me souviens,2015-02-14 10:54:49,E13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1476,1127280169,Frilaif Official,frilaif,49,208,124,14,2,Mon Jan 28 07:13:47 +0000 2013,http://t.co/IkqfPF32pr,...,DDEEF6,http://a0.twimg.com/images/themes/theme1/bg.png,C0DEED,0084B4,,,,Sex and Erotic Advisor. Sexual health is a sta...,2015-02-14 10:54:49,E13
1477,1156344000,♔♥HeartsQueen♥♔,Stronzetta__,66,22,55,0,0,Thu Feb 07 07:27:08 +0000 2013,,...,7AC3EE,http://a0.twimg.com/images/themes/theme10/bg.gif,642D8B,740CC4,,,,Se avessi seguito tutte le regole non sarei ma...,2015-02-14 10:54:49,E13
1478,1169114810,angelagervasi,angelagervasi2,4,0,4,3,0,Mon Feb 11 14:50:19 +0000 2013,,...,DDEEF6,http://a0.twimg.com/images/themes/theme1/bg.png,C0DEED,0084B4,,,,"mamma e giovane nonna,laureata,amante del teat...",2015-02-14 10:54:49,E13
1479,1212975186,Midnight,Movie1O,3,2,4,0,0,Sat Feb 23 17:46:55 +0000 2013,,...,F6FFD1,http://a0.twimg.com/profile_background_images/...,FFFFFF,000000,,,,"If you don't shut your fucking mouth, I will k...",2015-02-14 10:54:49,E13


In [72]:
simulate_numerical_column(data, "favourites_count", mean = 500, sd = 1, decimal_places = 2, noise = 0.1, distribution = "Power Normal",low = 100, upp = 1000)




(              id              name     screen_name  statuses_count  \
 0        3610511  Davide Dellacasa          braddd           20370   
 1        5656162    Simone Economo           eKoeS            3131   
 2        5682702            tacone         tacone_            4024   
 3        6067292          alesaura        alesstar           40586   
 4        6015122            Angelo      PerDiletto            2016   
 ...          ...               ...             ...             ...   
 1476  1127280169  Frilaif Official         frilaif              49   
 1477  1156344000   ♔♥HeartsQueen♥♔    Stronzetta__              66   
 1478  1169114810     angelagervasi  angelagervasi2               4   
 1479  1212975186          Midnight         Movie1O               3   
 1480  1213937306               ʸᵒᵘ  xjawaadscookie            2568   
 
       followers_count  friends_count  favourites_count  listed_count  \
 0                5470           2385            499.96            52   
