# AIM - To create a ML model to identify FM stations

Python version used -> python 3

# Step - 1
## Run RTL_power command 

Install GQRX software then connect RTL_SDR dongle and open terminal.

Note : Following command only works for Linux and Mac OS.

COMMAND -> rtl_power -f min:max:bin -g gain -i interval -e runtime filename.ext
where
min is initial frequency
max is terminal frequency
bin is frequency interval
interval in seconds

COMMAND I USED - 

rtl_power -f 87M:108M:1k -g 20 -i 10 -e 5m logfile.csv

All the data is stored in a csv file logfile.csv.

# Step - 2
## Data cleaning 

We will now convert obtained csv into a desireable pandas dataframe

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dfs = pd.read_csv("logfile.csv", header=None)

In [2]:
dfs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4093,4094,4095,4096,4097,4098,4099,4100,4101,4102
0,2018-03-30,22:31:10,87000000,89625000,640.87,40,-45.44,-50.61,-52.59,-52.59,...,-53.91,-54.33,-53.71,-54.75,-53.57,-54.38,-53.16,-52.52,-48.98,-48.98
1,2018-03-30,22:31:10,89625000,92250000,640.87,40,-55.9,-57.27,-57.36,-56.05,...,-56.5,-57.31,-58.34,-56.52,-56.05,-57.12,-57.2,-56.05,-57.33,-57.33
2,2018-03-30,22:31:10,92250000,94875000,640.87,40,-40.56,-41.09,-40.24,-41.16,...,-41.91,-41.46,-40.78,-43.21,-42.09,-42.71,-41.34,-40.86,-42.04,-42.04
3,2018-03-30,22:31:10,94875000,97500000,640.87,40,-41.38,-40.05,-39.69,-40.9,...,-44.02,-41.72,-43.54,-40.14,-42.24,-43.46,-41.24,-42.33,-41.59,-41.59
4,2018-03-30,22:31:10,97500000,100125000,640.87,40,-52.95,-55.07,-54.32,-56.54,...,-52.79,-52.11,-53.67,-53.33,-53.83,-52.23,-52.53,-54.32,-54.27,-54.27


In [3]:
def conversion_function(df):    
    x=[] # Stores all the frequencies
    y=[] # Stores corresponding power value
    z=[] # Stores is_FM, if 1 then yes, if 0 then no

    # following array contains all BAY AREA FM STATIONS
    arr = [87.9, 88.1, 88.5, 89.1, 89.3, 89.5, 89.7, 89.9, 90.1, 90.3, 90.5, 90.7, 91.1, 91.5, 91.7, 92.1, 92.3, 92.7, 93.3, 94.1, 94.5, 94.9, 95.3, 95.7, 96.1, 96.5, 97.3, 98.1, 98.5, 98.9, 99.7, 100.3, 101.3, 101.7, 102.1, 102.9, 103.3, 103.7, 104.5, 104.9, 105.3, 105.7, 106.1, 106.5, 106.9, 107.7]

    #following code makes 3 lists, x-> freq,y-> power, z-> Is_FM?
    for j in range(0,len(df)):
        for i in range(6,4103):
            y.append(float(df[i][j]))
            r = (df[3][j]-df[2][j])/4096
            temp = df[3][j]+(r*(i-6))
            x.append(temp)
            check = round(temp/100000)
            check = int(check)
            check = float(check/10)
            n=0
            if(check in arr):
                n=1
            z.append(int(n))
    df = pd.DataFrame({"Frequency":x,"Power":y,"Is_FM":z})
    return df

In [4]:
%%time
dfs = conversion_function(dfs)

Wall time: 1min 52s


In [5]:
dfs.head()

Unnamed: 0,Frequency,Is_FM,Power
0,89625000.0,0,-45.44
1,89625640.0,0,-50.61
2,89626280.0,0,-52.59
3,89626920.0,0,-52.59
4,89627560.0,0,-53.53


# Step 3 - 
## Train all the classification models to find the best one

Note -  For training, frequency range is from 87 MHz to 108 MHz

In [6]:
%%time
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import Perceptron
import xgboost as xgb

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

Wall time: 702 ms




In [7]:
X = dfs.drop('Is_FM', axis=1)
y = dfs['Is_FM']

In [8]:
%%time

# training 4 different classification models

regression_model = LogisticRegression()
regression_model.fit(X, y)

y_predict = regression_model.predict(X)
print("Using Logistic regression -\nTraining accuracy :",accuracy_score(y,y_predict)*100,"%")

knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X, y)

y_predict = knn.predict(X)
print("Using KNN -\nTraining accuracy :",accuracy_score(y,y_predict)*100,"%")

perceptron = Perceptron()
warnings.filterwarnings("ignore")
perceptron.fit(X, y)

y_predict = perceptron.predict(X)
print("Using Perceptron -\nTraining accuracy :",accuracy_score(y,y_predict)*100,"%")

gradboost = xgb.XGBClassifier(n_estimators=1000)
gradboost.fit(X, y)

y_predict = gradboost.predict(X)
print("Using XGBoost -\nTraining accuracy :",accuracy_score(y,y_predict)*100,"%")

Using Logistic regression -
Training accuracy : 80.9555772516 %
Using KNN -
Training accuracy : 100.0 %
Using Perceptron -
Training accuracy : 80.9555772516 %
Using XGBoost -
Training accuracy : 100.0 %
Wall time: 5min 57s


# Step - 4
## Testing with test data in frequency range 87MHz to 108MHz

Command used to generate file -

rtl_power -f 87M:108M:1k -g 20 -i 10 -e 5m lgfile.csv

Generating list of fm stations recognised -

In [62]:
dfs = pd.read_csv("logfile2.csv", header=None)
dfs1 = pd.read_csv("lgfile.csv", header=None)

In [63]:
dfs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4093,4094,4095,4096,4097,4098,4099,4100,4101,4102
0,2018-04-26,21:50:21,87000000,89625000,640.87,36,-57.61,-59.93,-59.54,-58.86,...,-57.8,-59.3,-58.23,-57.58,-58.86,-58.82,-59.11,-57.99,-59.04,-59.04
1,2018-04-26,21:50:21,89625000,92250000,640.87,36,-57.94,-58.36,-60.12,-59.15,...,-58.79,-58.45,-59.0,-59.22,-58.2,-57.66,-58.48,-58.72,-59.71,-59.71
2,2018-04-26,21:50:21,92250000,94875000,640.87,36,-56.83,-59.26,-57.45,-58.75,...,-58.58,-59.26,-57.25,-57.69,-59.58,-59.07,-59.11,-57.8,-57.69,-57.69
3,2018-04-26,21:50:21,94875000,97500000,640.87,36,-57.55,-59.46,-57.96,-57.82,...,-57.23,-59.07,-58.08,-58.36,-58.89,-57.42,-58.52,-58.89,-57.63,-57.63
4,2018-04-26,21:50:21,97500000,100125000,640.87,36,-57.04,-59.0,-58.26,-58.11,...,-59.46,-59.38,-58.55,-58.2,-57.45,-59.89,-57.4,-58.89,-58.17,-58.17


In [64]:
dfs1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4093,4094,4095,4096,4097,4098,4099,4100,4101,4102
0,2018-04-15,16:06:13,87000000,89625000,640.87,38,-45.84,-55.77,-56.08,-57.61,...,-56.32,-55.66,-56.39,-56.26,-54.67,-55.76,-57.07,-56.3,-51.56,-51.56
1,2018-04-15,16:06:13,89625000,92250000,640.87,38,-55.28,-57.92,-56.26,-55.92,...,-56.83,-56.15,-56.35,-57.09,-57.44,-56.12,-57.32,-56.32,-56.71,-56.71
2,2018-04-15,16:06:13,92250000,94875000,640.87,38,-37.9,-37.65,-38.25,-39.42,...,-37.16,-37.5,-38.46,-37.63,-37.81,-39.55,-38.36,-35.82,-37.16,-37.16
3,2018-04-15,16:06:13,94875000,97500000,640.87,38,-35.69,-39.1,-37.89,-38.95,...,-34.19,-33.4,-35.91,-35.23,-33.25,-33.32,-34.95,-35.3,-36.07,-36.07
4,2018-04-15,16:06:13,97500000,100125000,640.87,38,-53.26,-52.24,-52.38,-53.54,...,-48.42,-49.07,-49.04,-48.56,-49.26,-49.03,-48.11,-50.35,-52.49,-52.49


In [65]:
def conv_func(df):
    x=[] # Stores all the frequencies
    y=[] # Stores corresponding power value
    
    for j in range(0,len(df)):
        for i in range(6,4103):
            y.append(float(df[i][j]))
            r = (df[3][j]-df[2][j])/4096
            temp = df[3][j]+(r*(i-6))
            x.append(temp)
    df = pd.DataFrame({"Frequency":x,"Power":y})
    return df

In [66]:
%%time
dfs = conv_func(dfs)
dfs1 = conv_func(dfs1)

Wall time: 2min 9s


In [67]:
dfs.head()

Unnamed: 0,Frequency,Power
0,89625000.0,-57.61
1,89625640.0,-59.93
2,89626280.0,-59.54
3,89626920.0,-58.86
4,89627560.0,-59.34


In [68]:
dfs1.head()

Unnamed: 0,Frequency,Power
0,89625000.0,-45.84
1,89625640.0,-55.77
2,89626280.0,-56.08
3,89626920.0,-57.61
4,89627560.0,-54.9


In [69]:
%%time
y_predict1 = gradboost.predict(dfs)
y_predict3 = knn.predict(dfs)

Wall time: 3.6 s


In [70]:
y_predict11 = gradboost.predict(dfs1)
y_predict33 = knn.predict(dfs1)

In [71]:
%%time
l1=[]
l3=[]
for i in range(0,len(y_predict1)):
    num = round(dfs["Frequency"][i]/100000)
    num = int(num)
    num = float(num/10)
    if(y_predict1[i]==1):
        if(not num in l1):
            l1.append(num)
    if(y_predict3[i]==1):
        if(not num in l3):
            l3.append(num)

Wall time: 7.62 s


In [73]:
l11=[]
l33=[]
for i in range(0,len(y_predict11)):
    num = round(dfs1["Frequency"][i]/100000)
    num = int(num)
    num = float(num/10)
    if(y_predict11[i]==1):
        if(not num in l11):
            l11.append(num)
    if(y_predict33[i]==1):
        if(not num in l33):
            l33.append(num)

In [75]:
print(len(l11))
print(len(l33))
print(len(l1))
print(len(l3))

40
40
40
40


In [56]:
print(l1) # FM Radio stations found using XG Boost
print(l3) # FM Radio stations found using KNN model
print(l11) # FM Radio stations found using XG Boost
print(l33) # FM Radio stations found using KNN model

[89.7, 89.9, 90.1, 90.3, 90.5, 90.7, 91.1, 91.5, 91.7, 92.1, 92.3, 92.7, 93.3, 94.1, 94.5, 94.9, 95.3, 95.7, 96.1, 96.5, 97.3, 98.1, 98.5, 98.9, 99.7, 100.3, 101.3, 101.7, 102.1, 102.9, 103.3, 103.7, 104.5, 104.9, 105.3, 105.7, 106.1, 106.5, 106.9, 107.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 89.7, 

In [76]:
#Bay Area FM Radio Station
radio = [87.9, 88.1, 88.5, 89.1, 89.3, 89.5, 89.7, 89.9, 90.1, 90.3, 90.5, 90.7, 91.1, 91.5, 91.7, 92.1, 92.3, 92.7, 93.3, 94.1, 94.5, 94.9, 95.3, 95.7, 96.1, 96.5, 97.3, 98.1, 98.5, 98.9, 99.7, 100.3, 101.3, 101.7, 102.1, 102.9, 103.3, 103.7, 104.5, 104.9, 105.3, 105.7, 106.1, 106.5, 106.9, 107.7]

In [77]:
ctr= 0
for i in range (0,len(l1)):
    if(l1[i] in radio):
        ctr=ctr+1
print("For ")
print("XGBOOST found", len(l1),"radio stations out of which actual Bay Area FM Radio Station are",ctr,"\nAnd",ctr,"stations were found out of",len(radio),"Bay Area FM Radio Station\nXGBOOST accuracy -",ctr/len(l1)*100,"%")
ctr=0
for i in range (0,len(l3)):
    if(l3[i] in radio):
        ctr=ctr+1
print("KNN model found", len(l3),"radio stations out of which actual Bay Area FM Radio Station are",ctr,"\nAnd",ctr,"stations were found out of",len(radio),"Bay Area FM Radio Station\nKNN model accuracy -",ctr/len(l3)*100,"%")

XGBOOST found 40 radio stations out of which actual Bay Area FM Radio Station are 40 
And 40 stations were found out of 46 Bay Area FM Radio Station
XGBOOST accuracy - 100.0 %
KNN model found 40 radio stations out of which actual Bay Area FM Radio Station are 40 
And 40 stations were found out of 46 Bay Area FM Radio Station
KNN model accuracy - 100.0 %


In [78]:
ctr= 0
for i in range (0,len(l11)):
    if(l11[i] in radio):
        ctr=ctr+1
print("XGBOOST found", len(l11),"radio stations out of which actual Bay Area FM Radio Station are",ctr,"\nAnd",ctr,"stations were found out of",len(radio),"Bay Area FM Radio Station\nXGBOOST accuracy -",ctr/len(l11)*100,"%")
ctr=0
for i in range (0,len(l33)):
    if(l33[i] in radio):
        ctr=ctr+1
print("KNN model found", len(l33),"radio stations out of which actual Bay Area FM Radio Station are",ctr,"\nAnd",ctr,"stations were found out of",len(radio),"Bay Area FM Radio Station\nKNN model accuracy -",ctr/len(l33)*100,"%")

XGBOOST found 40 radio stations out of which actual Bay Area FM Radio Station are 40 
And 40 stations were found out of 46 Bay Area FM Radio Station
XGBOOST accuracy - 100.0 %
KNN model found 40 radio stations out of which actual Bay Area FM Radio Station are 40 
And 40 stations were found out of 46 Bay Area FM Radio Station
KNN model accuracy - 100.0 %
