In [36]:
import numpy as np
import pandas as pd

In [37]:
# Number of samples
n_samples = 10000
anomaly_fraction = 0.05  # 5% anomalous data

# Define possible values for categorical features
slice_types = ['eMBB', 'URLLC', 'mMTC']
device_types = ['smartphone', 'sensor', 'camera', 'drone']

In [38]:
# Create the normal data
data = {
    # Network Slicing Features
    'SliceID': np.random.randint(1000, 2000, n_samples),
    'SliceType': np.random.choice(slice_types, n_samples),
    'SLA_Latency_ms': np.random.uniform(1, 10, n_samples),
    'SLA_Bandwidth_Mbps': np.random.uniform(50, 100, n_samples),
    'CPUAllocation%': np.random.uniform(50, 100, n_samples),
    'MemoryAllocation%': np.random.uniform(50, 100, n_samples),
    'Bandwidth_Allocation_Mbps': np.random.uniform(50, 200, n_samples),
    'TrafficVolume_GB': np.random.uniform(0.1, 5, n_samples),
    'PacketInterArrival_ms': np.random.uniform(0.1, 5, n_samples),
    'ServiceType': np.random.choice(['voice', 'video', 'IoT'], n_samples),

    # Ultra-Low Latency Features
    'EndToEndLatency_ms': np.random.uniform(1, 20, n_samples),
    'Jitter_ms': np.random.uniform(0.1, 5, n_samples),
    'RTT_ms': np.random.uniform(1, 20, n_samples),
    'QoS_Delay_ms': np.random.uniform(1, 10, n_samples),
    'QoS_Jitter_ms': np.random.uniform(0.1, 5, n_samples),
    'QoSPacketLoss%': np.random.uniform(0, 1, n_samples),

    # High Data Throughput Features
    'Throughput_Mbps': np.random.uniform(10, 1000, n_samples),
    'PacketLossRate%': np.random.uniform(0, 1, n_samples),
    'RetransmissionRate%': np.random.uniform(0, 1, n_samples),
    'BandwidthUtilization%': np.random.uniform(50, 100, n_samples),
    'FlowDirection': np.random.choice(['uplink', 'downlink'], n_samples),

    # Massive IoT Connectivity Features
    'DeviceType': np.random.choice(device_types, n_samples),
    'ConnectionDensity_per_km2': np.random.uniform(100, 1000, n_samples),
    'DataUsage_MB': np.random.uniform(0.1, 100, n_samples),
    'PacketSize_Bytes': np.random.randint(64, 1500, n_samples),
    'ConnectionTime_ms': np.random.uniform(10, 500, n_samples),
    'BatteryLevel%': np.random.uniform(20, 100, n_samples),
    'SignalStrength_dBm': np.random.uniform(-120, -50, n_samples),

    # MIMO Features
    'BeamID': np.random.randint(1, 100, n_samples),
    'BeamAngle_deg': np.random.uniform(0, 360, n_samples),
    'SignalStrength_dB': np.random.uniform(-120, -50, n_samples),
    'ChannelQualityIndicator': np.random.randint(1, 15, n_samples),
    'SNR_dB': np.random.uniform(5, 30, n_samples),
    'SpatialStreams': np.random.randint(1, 8, n_samples),
    'UELocation': np.random.uniform(0, 100, n_samples),  # Simulating coordinates within the cell
}

In [39]:
# Convert the dictionary to a DataFrame
df = pd.DataFrame(data)

# Add a column for anomaly detection
df['Anomaly'] = 0  # Start with all samples as normal

# Select a random subset for anomalies
anomaly_indices = np.random.choice(df.index, size=int(anomaly_fraction * n_samples), replace=False)

# Apply anomalies to specific columns
df.loc[anomaly_indices, 'EndToEndLatency_ms'] = np.random.uniform(50, 100, len(anomaly_indices))
df.loc[anomaly_indices, 'PacketLossRate%'] = np.random.uniform(5, 10, len(anomaly_indices))
df.loc[anomaly_indices, 'RetransmissionRate%'] = np.random.uniform(5, 10, len(anomaly_indices))
df.loc[anomaly_indices, 'SignalStrength_dBm'] = np.random.uniform(-140, -130, len(anomaly_indices))
df.loc[anomaly_indices, 'BatteryLevel%'] = np.random.uniform(0, 10, len(anomaly_indices))

# Label these samples as anomalies
df.loc[anomaly_indices, 'Anomaly'] = 1

In [40]:
df.to_csv("New_dataset2.csv")
# Display the first few rows of the dataset
print(df.head())

   SliceID SliceType  SLA_Latency_ms  SLA_Bandwidth_Mbps  CPUAllocation%  \
0     1851      mMTC        7.302353           64.813163       67.988920   
1     1037      eMBB        7.249294           72.210178       82.510488   
2     1723      eMBB        5.642935           68.718612       83.551987   
3     1785      mMTC        4.737606           60.762254       90.972633   
4     1903     URLLC        5.869500           70.072329       84.647358   

   MemoryAllocation%  Bandwidth_Allocation_Mbps  TrafficVolume_GB  \
0          82.386098                 111.455872          0.157606   
1          67.431284                  71.848784          4.217008   
2          67.500213                 130.632293          0.387957   
3          70.983929                  70.189836          2.057627   
4          72.780475                 186.906736          3.450233   

   PacketInterArrival_ms ServiceType  ...  BatteryLevel%  SignalStrength_dBm  \
0               3.072438       video  ...      7