In [26]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
import matplotlib.pyplot as plt



In [27]:
df=pd.read_csv('dataset22.csv')

In [28]:
df

Unnamed: 0,EnB Number,Throughput,Delay,Jitter,Packet Loss,Connected UE,5G UE,ResourceFactor
0,0,150.926,2.08583,0.607,0,27,6,0.555556
1,1,132.379,0.494901,0.023477,0,38,12,0.447368
2,2,113.252,0.311122,0.019222,2,46,11,0.456522
3,3,203.722,0.005941,0.002383,0,29,9,0.655172
4,4,130.993,1.69148,0.4886,0,40,11,0.55
5,5,166.41,0.024811,0.009208,0,39,17,0.589744
6,6,107.76,0.026142,0.010017,0,60,11,0.5
7,7,133.737,0.84171,0.066875,0,41,13,0.390244
8,8,198.419,0.768685,0.66425,0,25,8,0.6
9,9,179.309,0.585281,0.171261,0,24,8,0.541667


In [29]:
dfaug=df.copy()

In [30]:
X=dfaug.drop(['EnB Number'],axis=1)

In [31]:
column_stats = X.describe().loc[['mean', 'std']]
augmented_data = pd.concat([dfaug]*8, ignore_index=True)  # Duplicate the dataset 5 times


In [32]:
augmented_data.drop(['EnB Number'],axis=1,inplace=True)

In [33]:
for column in column_stats:  # Iterate over columns in column_stats
    mean = column_stats[column]['mean']
    std = column_stats[column]['std']
    noise=np.random.normal(0,std*0.00001,len(augmented_data))
    if column == 'Throughput':
        # noise = np.random.uniform(low=np.min(X[column]), high=np.max(X[column]), size=len(augmented_data))
        augmented_data[column] = noise+augmented_data[column]

    elif column == 'Delay':
        # noise = np.random.uniform(low=np.min(X[column]), high=2, size=len(augmented_data))
        # noise = np.random.uniform(low=np.min(X[column]), high=np.max(X[column]), size=len(augmented_data))
        augmented_data[column] = noise + augmented_data[column]
        
    elif column == 'Jitter':
        # noise = np.random.uniform(low=np.min(X[column]), high=1, size=len(augmented_data))
        noise = np.random.uniform(low=np.min(X[column]), high=np.max(X[column]), size=len(augmented_data))
        augmented_data[column] = noise + augmented_data[column]
        
    elif column == 'Packet Loss':
        # noise = np.random.uniform(low=0, high=3, size=len(augmented_data))
        noise = np.random.uniform(low=np.min(X[column]), high=3, size=len(augmented_data))
        augmented_data[column] = np.floor(noise).astype(int)
    
    elif column == 'Connected UE':
        # noise = np.random.uniform(low=30, high=130, size=len(augmented_data))
        # noise = np.random.normal(0, std*0.1, size=len(augmented_data))
        # augmented_data[column] = np.ceil(noise).astype(int)+augmented_data[column]
        pass
        
    
    elif column == '5G UE':
        # noise = np.random.uniform(low=8, high=30, size=len(augmented_data))
        # noise = np.random.uniform(low=-5, high=+5, size=len(augmented_data))
        # pass
        # augmented_data[column] = np.ceil(noise).astype(int)+augmented_data[column]
        pass
    
    elif column == 'ResourceFactor':
        # noise = np.random.uniform(low=0.4, high=0.70, size=len(augmented_data))
        # noise = np.random.uniform(low=np.min(X[column]), high=np.max(X[column]), size=len(augmented_data))
        augmented_data[column] = noise + augmented_data[column]
    
    else:
        print("No match found")
        break


In [34]:
X = pd.concat([X, augmented_data], ignore_index=True)


In [35]:
X

Unnamed: 0,Throughput,Delay,Jitter,Packet Loss,Connected UE,5G UE,ResourceFactor
0,150.926000,2.085830,0.607000,0,27,6,0.555556
1,132.379000,0.494901,0.023477,0,38,12,0.447368
2,113.252000,0.311122,0.019222,2,46,11,0.456522
3,203.722000,0.005941,0.002383,0,29,9,0.655172
4,130.993000,1.691480,0.488600,0,40,11,0.550000
...,...,...,...,...,...,...,...
193,87.222926,0.049971,0.208397,1,62,24,0.435484
194,141.823455,0.823337,0.535840,2,41,6,0.512196
195,98.643043,1.844330,0.477607,2,58,18,0.672415
196,98.614712,1.264490,0.277957,2,48,16,0.416668


In [36]:
feature_weights = {
    'Throughput': 0.2,
    'Delay': 0.1,
    'Jitter': 0.1,
    'Packet Loss': 0.1,
    'Connected UE':0.1,
    '5G UE':0.2,
    'ResourceFactor':0.2
}

In [37]:
weighted_X_scaled = pd.DataFrame()

for column, weight in feature_weights.items():
    weighted_X_scaled[column + '_Weighted'] = X[column] * weight

In [38]:
weighted_X_scaled

Unnamed: 0,Throughput_Weighted,Delay_Weighted,Jitter_Weighted,Packet Loss_Weighted,Connected UE_Weighted,5G UE_Weighted,ResourceFactor_Weighted
0,30.185200,0.208583,0.060700,0.0,2.7,1.2,0.111111
1,26.475800,0.049490,0.002348,0.0,3.8,2.4,0.089474
2,22.650400,0.031112,0.001922,0.2,4.6,2.2,0.091304
3,40.744400,0.000594,0.000238,0.0,2.9,1.8,0.131034
4,26.198600,0.169148,0.048860,0.0,4.0,2.2,0.110000
...,...,...,...,...,...,...,...
193,17.444585,0.004997,0.020840,0.1,6.2,4.8,0.087097
194,28.364691,0.082334,0.053584,0.2,4.1,1.2,0.102439
195,19.728609,0.184433,0.047761,0.2,5.8,3.6,0.134483
196,19.722942,0.126449,0.027796,0.2,4.8,3.2,0.083334


In [39]:

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(weighted_X_scaled)
X_scaled_df = pd.DataFrame(X_scaled, columns=weighted_X_scaled.columns)
X_scaled_df


Unnamed: 0,Throughput_Weighted,Delay_Weighted,Jitter_Weighted,Packet Loss_Weighted,Connected UE_Weighted,5G UE_Weighted,ResourceFactor_Weighted
0,0.571574,0.999994,0.488594,0.0,0.112360,0.00,0.585859
1,0.440247,0.235092,0.017046,0.0,0.235955,0.24,0.202447
2,0.304812,0.146733,0.013608,1.0,0.325843,0.20,0.234888
3,0.945412,0.000005,0.000000,0.0,0.134831,0.12,0.938892
4,0.430433,0.810395,0.392914,0.0,0.258427,0.20,0.566169
...,...,...,...,...,...,...,...
193,0.120505,0.021175,0.166481,0.5,0.505618,0.72,0.160331
194,0.507121,0.393001,0.431089,1.0,0.269663,0.00,0.432192
195,0.201369,0.883883,0.384031,1.0,0.460674,0.48,1.000000
196,0.201168,0.605103,0.222693,1.0,0.348315,0.40,0.093648


In [40]:
label=np.array(X_scaled_df.sum(axis=1))
label=label.reshape(-1,1)

In [41]:
scaler=MinMaxScaler()

Y=scaler.fit_transform(label)
print(np.median(Y))


0.3486836743199421


In [42]:
Y=(Y>=np.median(Y)).astype(int)

In [43]:
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=10,stratify=Y)


In [44]:
classifier = RandomForestClassifier()
classifier.fit(X_train, Y_train)
Y_pred = classifier.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
print("Accuracy:", accuracy)

  return fit_method(estimator, *args, **kwargs)


Accuracy: 0.95


In [45]:
count1=0
for val in Y:
    if val==1:
        count1+=1

count0=len(Y)-count1
print(count0)
print(count1)

99
99


In [46]:
precision = precision_score(Y_test, Y_pred)

# Calculate recall
recall = recall_score(Y_test, Y_pred)

# Calculate F1-score
f1 = f1_score(Y_test, Y_pred)

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 1.0
Recall: 0.9
F1-score: 0.9473684210526316


In [47]:
rf=RandomForestClassifier()
rf.fit(X_train,Y_train)
Y_prob=rf.predict_proba(X_test)
print(Y_pred)

  return fit_method(estimator, *args, **kwargs)


[0 0 1 1 1 0 0 0 1 0 0 1 0 0 1 0 0 1 1 0 0 0 1 1 1 1 1 0 0 1 0 1 0 1 1 0 1
 0 0 0]


In [48]:
ans=rf.predict_proba(X_scaled)

In [49]:
print(ans[:22])

[[0.04 0.96]
 [0.99 0.01]
 [0.96 0.04]
 [0.82 0.18]
 [0.03 0.97]
 [0.89 0.11]
 [1.   0.  ]
 [1.   0.  ]
 [0.04 0.96]
 [1.   0.  ]
 [0.94 0.06]
 [0.99 0.01]
 [0.97 0.03]
 [1.   0.  ]
 [0.99 0.01]
 [1.   0.  ]
 [0.71 0.29]
 [0.97 0.03]
 [0.98 0.02]
 [0.02 0.98]
 [0.69 0.31]
 [0.   1.  ]]


In [50]:
print(Y[0:22])

[[1]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]]
