In [None]:
!pip install scikit-multiflow river pandas

In [3]:
import river.datasets.synth as rvr
from skmultiflow.data import *
import pandas as pd
from skmultiflow.data import (
    AGRAWALGenerator,
    HyperplaneGenerator,
    LEDGenerator,
    LEDGeneratorDrift,
    AnomalySineGenerator,
    ConceptDriftStream,
    RandomRBFGenerator,
    RandomRBFGeneratorDrift,
    RandomTreeGenerator,
    SEAGenerator,
    SineGenerator,
    STAGGERGenerator,
    WaveformGenerator
)

In [4]:

skmul_generator_dict={
  "aggrawal": lambda: AGRAWALGenerator(random_state=31,balance_classes=False),
# generator.generate_drift(),

  "hyperlane": lambda: HyperplaneGenerator(n_features=5,n_drift_features=5,random_state=31),    #Hyperlane

  "ledgenerator":  lambda: ConceptDriftStream(   #LEDGenerator
    stream=LEDGenerator(random_state=31, noise_percentage=0.1),
    drift_stream=LEDGenerator(random_state=384, noise_percentage=0.3),
    position=20000,  # Position of drift
    width=1000      # Width of transition
),

"anomalysine": lambda: ConceptDriftStream(   #AnomalySine
    stream=AnomalySineGenerator(random_state=31, n_samples=40000,n_anomalies=10000,noise=0.6),
    drift_stream=AnomalySineGenerator(random_state=31, n_samples=40000,n_anomalies=10000,noise=0.2),
    position=15000,  # Position of drift
    width=5000      # Width of transition
),


"anomalysine_2": lambda: ConceptDriftStream(   #AnomalySine 2
    stream=AnomalySineGenerator(random_state=31, n_samples=40000,n_anomalies=5000,noise=0.2),
    drift_stream=AnomalySineGenerator(random_state=11, n_samples=40000,n_anomalies=5000,noise=0.6),
    position=10000,  # Position of drift
    width=10000      # Width of transition
),

"ledgenerator_default_drift":  lambda: LEDGeneratorDrift(random_state = 31, noise_percentage = 0.4 ,has_noise= True, n_drift_features=4), #LedGeneratorDrift


"randomRBF_gradual": lambda: ConceptDriftStream(   # RandomRBFGenerator
    stream=RandomRBFGenerator(model_random_state=16, sample_random_state=64, n_classes=3, n_features=8, n_centroids=45),
    drift_stream=RandomRBFGenerator(model_random_state=45, sample_random_state=94, n_classes=3, n_features=8, n_centroids=54),
    position=15000,  # Position of drift
    width=15000      # Width of transition
),

"randomRBF_default_drift": lambda: RandomRBFGeneratorDrift(model_random_state=21,sample_random_state=46,n_centroids=82,n_features=12), #RandomRBF Drift

"randomTree_default": lambda: RandomTreeGenerator(tree_random_state=75,sample_random_state=46,n_num_features=6,n_cat_features=2,n_categories_per_cat_feature=3,min_leaf_depth=75,max_tree_depth=13),

"randomTree": lambda: ConceptDriftStream(      #RandomTree
    stream=RandomTreeGenerator(
        tree_random_state=75,
        sample_random_state=46,
        n_num_features=6,
        n_cat_features=2,
        n_categories_per_cat_feature=3,
        min_leaf_depth=75,
        max_tree_depth=13
    ),
    drift_stream=RandomTreeGenerator(
        tree_random_state=87, 
        sample_random_state=56,
        n_num_features=6,
        n_cat_features=2,
        n_categories_per_cat_feature=3,
        min_leaf_depth=46,
        max_tree_depth=13
    ),
    position=15000,
    width=1000
),

"randomTree_2": lambda: ConceptDriftStream(      #RandomTree2
    stream=RandomTreeGenerator(
        tree_random_state=75,
        sample_random_state=46,
        n_num_features=6,
        n_cat_features=2,
        n_categories_per_cat_feature=3,
        min_leaf_depth=75,
        max_tree_depth=13
    ),
    drift_stream=RandomTreeGenerator(
        tree_random_state=87, 
        sample_random_state=56,
        n_num_features=6,
        n_cat_features=3,
        n_categories_per_cat_feature=2,
        min_leaf_depth=57,
        max_tree_depth=31
    ),
    position=15000,
    width=10000
),

"SEA": lambda: SEAGenerator(classification_function = 3, random_state = 74, balance_classes = True, noise_percentage = 0.5),  #SEA

"Sine": lambda: SineGenerator(classification_function=3, random_state=76, balance_classes=False, has_noise=True),  #Sine

"SEA_2": lambda: ConceptDriftStream(    #SEA_2
    stream=SEAGenerator(classification_function = 3, random_state = 74, balance_classes = True, noise_percentage = 0.2),
    drift_stream=SEAGenerator(classification_function = 2, random_state = 357, balance_classes = True, noise_percentage = 0.4) ,
    position=20000,  
    width=10000      
),

"STAGGER": lambda: ConceptDriftStream(   #STAGGER
    stream=STAGGERGenerator(
        classification_function=2,
        random_state=165,
        balance_classes=False
    ),
    drift_stream=STAGGERGenerator(
        classification_function=0, 
        random_state=78,
        balance_classes=True
    ),
    position=25000,  
    width=5000      
),

"STAGGER_2": lambda: ConceptDriftStream(    #STAGGER2
    stream=STAGGERGenerator(
        classification_function=1,
        random_state=15,
        balance_classes=True
    ),
    drift_stream=STAGGERGenerator(
        classification_function=2, 
        random_state=73,
        balance_classes=False
    ),
    position=15000,  
    width=15000      
),

"Wave": lambda: ConceptDriftStream(  #Waveform
    stream=WaveformGenerator(
        random_state=774,
        has_noise=True
    ),
    drift_stream=WaveformGenerator(
        random_state=13,  
        has_noise=True
    ),
    position=14000,
    width=7000
)

}


sk_gen_name = "anomalysine_2"
generator = skmul_generator_dict[sk_gen_name]()


X,y=generator.next_sample(40000)

features=pd.DataFrame(X)
classes=pd.DataFrame(y)

features.columns = [f'x{i+1}' for i in range(len(features.columns))]
classes.columns = ['class']
skmul_data = pd.concat([features, classes], axis=1)

skmul_data

In [72]:
exit() #in order to prevent accidental overwrite during 'run all'

In [17]:
skmul_data.to_csv(f"stream_datasets/{sk_gen_name}.csv",index=False)

In [1]:

river_generator_dict={


"aggrawal": lambda: rvr.synth.Agrawal(classification_function=4,seed=8), #Aggrawal

"anomalysine": lambda: rvr.synth.AnomalySine(n_samples=50000,contextual=True,n_contextual=10000), #Anomaly sine



"hyperlane": lambda: rvr.ConceptDriftStream(      #Hyperlane
    stream=rvr.Hyperplane(seed=15,n_features=8,n_drift_features=4),
    drift_stream=rvr.Hyperplane(seed=71,n_features=8,n_drift_features=8),
    seed=87,
    position=20000,
    width=2000,
),

"hyperlane_2": lambda: rvr.ConceptDriftStream(      #Hyperlane_2
    stream=rvr.Hyperplane(seed=89,n_features=8,n_drift_features=3),
    drift_stream=rvr.Hyperplane(seed=47,n_features=8,n_drift_features=7),
    seed=6,
    position=20000,
    width=20000,
),

"led": lambda: rvr.ConceptDriftStream(   #LED
    stream=rvr.LED(seed=15, noise_percentage=0.2),
    drift_stream=rvr.LED(seed=71, noise_percentage=0.4),
    seed=87,
    position=20000,
    width=2000,
),

"led_default_drift": lambda: rvr.LEDDrift(seed = 112, noise_percentage = 0.28,
                         irrelevant_features= True, n_drift_features=4),

"mixed": lambda: rvr.ConceptDriftStream(   # River mixed
    stream=rvr.Mixed(seed=15, classification_function=0, balance_classes=False),
    drift_stream=rvr.Mixed(seed=71, classification_function=1, balance_classes=False),
    seed=87,
    position=20000,
    width=2000,
),




"randomRBF": lambda: rvr.ConceptDriftStream(  #RandomRBF
    stream=rvr.RandomRBF(
        seed_model=15,
        seed_sample=16,
        n_classes=2,
        n_features=9,
        n_centroids=50
    ),
    drift_stream=rvr.RandomRBF(
        seed_model=71,
        seed_sample=72,
        n_classes=2,
        n_features=9,
        n_centroids=84
    ),
    seed=78,
    position=25000,
    width=4000,
),


"randomRBF_2": lambda: rvr.ConceptDriftStream(   #RandomRBF_2
    stream=rvr.RandomRBF(
        seed_model=133,
        seed_sample=77,
        n_classes=2,
        n_features=9,
        n_centroids=36
    ),
    drift_stream=rvr.RandomRBF(
        seed_model=202,
        seed_sample=5,
        n_classes=2,
        n_features=9,
        n_centroids=65
    ),
    seed=311,
    position=18700,
    width=9200,
),

"randomRBF_drift": lambda: rvr.ConceptDriftStream(   #RandomRBFDrift
    stream=rvr.RandomRBFDrift(
        seed_model=15,
        seed_sample=16,
        n_classes=2,
        n_features=9,
        n_centroids=45,
        change_speed=0.001,
        n_drift_centroids=25
    ),
    drift_stream=rvr.RandomRBFDrift(
        seed_model=71,
        seed_sample=72,
        n_classes=2,
        n_features=9,
        n_centroids=21,
        change_speed=0.001,
        n_drift_centroids=35
    ),
    seed=87,
    position=20000,
    width=2000,
),

"randomTree": lambda: rvr.ConceptDriftStream(    #RandomTree
    stream=rvr.RandomTree(
        seed_tree=15,
        seed_sample=16,
        n_classes=2,
        n_num_features=4,
        n_cat_features=5,
        n_categories_per_feature=5,
        max_tree_depth=15,
        first_leaf_level=3,
        fraction_leaves_per_level=0.15
    ),
    drift_stream=rvr.RandomTree(
        seed_tree=71,
        seed_sample=72,
        n_classes=2,
        n_num_features=4,
        n_cat_features=5,
        n_categories_per_feature=5,
        max_tree_depth=5,
        first_leaf_level=6,
        fraction_leaves_per_level=0.15
    ),
    seed=87,
    position=30000,
    width=1000,
),


"randomTree_2": lambda: rvr.ConceptDriftStream(     #RandomTree_2
    stream=rvr.RandomTree(
        seed_tree=42,
        seed_sample=99,
        n_classes=2,
        n_num_features=6,
        n_cat_features=3,
        n_categories_per_feature=4,
        max_tree_depth=6,
        first_leaf_level=2,
        fraction_leaves_per_level=0.25
    ),
    drift_stream=rvr.RandomTree(
        seed_tree=123,
        seed_sample=321,
        n_classes=2,
        n_num_features=6,
        n_cat_features=3,
        n_categories_per_feature=4,
        max_tree_depth=7,
        first_leaf_level=3,
        fraction_leaves_per_level=0.2
    ),
    seed=202,
    position=12000,
    width=9000,
),



"SEA": lambda: rvr.ConceptDriftStream(   #SEA
    stream=rvr.SEA(
        variant=0,
        noise=0.1,
        seed=100
    ),
    drift_stream=rvr.SEA(
        variant=2,
        noise=0.1,
        seed=200
    ),
    seed=300,
    position=20000,
    width=2000,
),

"SEA_2": lambda: rvr.ConceptDriftStream(   
    stream=rvr.SEA(
        variant=1,
        noise=0.1,
        seed=547
    ),
    drift_stream=rvr.SEA(
        variant=2,
        noise=0.3,
        seed=614
    ),
    seed=300,
    position=15000,
    width=10000,
),

"STAGGER": lambda: rvr.ConceptDriftStream(
    stream=rvr.STAGGER(
        classification_function=0,
        seed=31,
        balance_classes=True
    ),
    drift_stream=rvr.STAGGER(
        classification_function=2,
        seed=99,
        balance_classes=False
    ),
    position=25000,   
    width=1000,      
    seed=2025
),

"SINE": lambda: rvr.ConceptDriftStream(  #SINE
    stream=rvr.Sine(
        classification_function=0, 
        seed=85,
        balance_classes=False,
        has_noise=True
    ),
    drift_stream=rvr.Sine(
        classification_function=0,  
        seed=72,
        balance_classes=True,
        has_noise=True
    ),
    position=25000,   
    width=800,      
    seed=202
),


"Waveform": lambda: rvr.ConceptDriftStream(
    stream=rvr.Waveform(
        seed=48,
        has_noise=True
    ),
    drift_stream=rvr.Waveform(
        seed=99,
        has_noise=True
    ),
    position=27000,  
    width=1000,     
    seed=205
),


"Waveform_2": lambda: rvr.ConceptDriftStream(   #Waveform 2
    stream=rvr.Waveform(
        seed=879,
        has_noise=True
    ),
    drift_stream=rvr.Waveform(
        seed=569,
        has_noise=True
    ),
    position=23000,  
    width=10000,     
    seed=245
)

}



rvr_gen_name = "SEA_2"
generator = river_generator_dict[rvr_gen_name]()

data=list(generator.take(40000))

X = [x for x, y in data]
y = [y for x, y in data]


features = pd.DataFrame(X)
classes=pd.DataFrame(y)

features.columns = [f'x{i+1}' for i in range(len(features.columns))]
classes.columns = ['class']

river_data = pd.concat([features, classes], axis=1)

river_data

In [None]:
exit()   #in order to prevent accidental overwrite

In [21]:
river_data.to_csv(f"stream_datasets/{rvr_gen_name}.csv",index=False)