In [1]:
import pandas as pd 

In [2]:
df = pd.read_csv('../data/2018_01_Sites_mobiles_2G_3G_4G_France_metropolitaine_L93.csv', sep=';')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77148 entries, 0 to 77147
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Operateur  77148 non-null  int64  
 1   x          77147 non-null  float64
 2   y          77147 non-null  float64
 3   2G         77148 non-null  int64  
 4   3G         77148 non-null  int64  
 5   4G         77148 non-null  int64  
dtypes: float64(2), int64(4)
memory usage: 3.5 MB


In [4]:
df.rename(columns={'Operateur': 'operateur'}, inplace=True)

In [5]:
df.head()

Unnamed: 0,operateur,x,y,2G,3G,4G
0,20801,102980.0,6847973.0,1,1,0
1,20810,103113.0,6848661.0,1,1,0
2,20820,103114.0,6848664.0,1,1,1
3,20801,112032.0,6840427.0,0,1,1
4,20801,115635.0,6799938.0,1,1,0


In [6]:
df.rename(columns={'Operateur': 'operateur'}, inplace=True)

#### Create a copy for processing 

In [7]:
df_cleaned = df.copy()

#### Check for duplicates entries

In [8]:
duplicates = df_cleaned[df_cleaned.duplicated(subset=['operateur', 'x', 'y', '2G', '3G', '4G'], keep=False)]
duplicates

Unnamed: 0,operateur,x,y,2G,3G,4G
5736,20801,373594.0,6298494.0,0,1,0
5737,20801,373594.0,6298494.0,0,1,0
7081,20801,397239.0,6857986.0,0,0,0
7082,20801,397239.0,6857986.0,0,0,0
8577,20801,418409.0,6428567.0,1,1,1
...,...,...,...,...,...,...
74733,20820,1037869.0,6298619.0,1,1,1
74975,20820,1041366.0,6296402.0,1,1,1
74976,20820,1041366.0,6296402.0,1,1,1
76524,20815,1180107.0,6091654.0,0,1,1


In [9]:
print(f'Records before deleting duplicates: {len(df_cleaned)}')
df_cleaned = df_cleaned.drop_duplicates(subset=['operateur', 'x', 'y', '2G', '3G', '4G'], keep='first')
print(f'Records after deleting duplicates: {len(df_cleaned)}')

Records before deleting duplicates: 77148
Records after deleting duplicates: 77024


#### Operators codes mapping

In [10]:
unique_operateurs = df_cleaned['operateur'].unique()
unique_operateurs

array([20801, 20810, 20820, 20815])

In [11]:
operators_mapping = {
    20801: 'Orange',
    20810: 'SFR',
    20815: 'Free', 
    20820: 'Bouygue'
}

df_cleaned['operator_name'] = df_cleaned['operateur'].map(operators_mapping)

#### Transform to GPS Coordinates 

In [12]:
import pyproj

lambert = pyproj.Proj('+proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs')
wgs84 = pyproj.Proj('+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs')

df_cleaned['longitude'], df_cleaned['latitude'] = pyproj.transform(lambert, wgs84, df_cleaned['x'], df_cleaned['y'])

  df_cleaned['longitude'], df_cleaned['latitude'] = pyproj.transform(lambert, wgs84, df_cleaned['x'], df_cleaned['y'])


In [13]:
df_cleaned.describe()

Unnamed: 0,operateur,x,y,2G,3G,4G,longitude,latitude
count,77024.0,77023.0,77023.0,77024.0,77024.0,77024.0,77023.0,77023.0
mean,20810.365419,695288.9,6637132.0,0.746611,0.938422,0.658587,2.918664,46.801993
std,7.349988,206867.9,250440.2,0.434954,0.240389,0.474187,2.697567,2.258747
min,20801.0,102980.0,6050021.0,0.0,0.0,0.0,-5.088856,41.364455
25%,20801.0,569705.0,6422107.0,0.0,1.0,0.0,1.333208,44.855818
50%,20810.0,674414.0,6689987.0,1.0,1.0,1.0,2.659307,47.258051
75%,20815.0,854684.0,6859646.0,1.0,1.0,1.0,5.005605,48.827163
max,20820.0,1240585.0,7113682.0,1.0,1.0,1.0,9.550389,51.106451


In [14]:
df_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Index: 77024 entries, 0 to 77147
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   operateur      77024 non-null  int64  
 1   x              77023 non-null  float64
 2   y              77023 non-null  float64
 3   2G             77024 non-null  int64  
 4   3G             77024 non-null  int64  
 5   4G             77024 non-null  int64  
 6   operator_name  77024 non-null  object 
 7   longitude      77023 non-null  float64
 8   latitude       77023 non-null  float64
dtypes: float64(4), int64(4), object(1)
memory usage: 5.9+ MB


In [49]:
import pyproj

lambert = pyproj.Proj('+proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs')
wgs84 = pyproj.Proj('+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs')

X = 649384.5
Y = 6861599.47

longitude, latitude = pyproj.transform(lambert, wgs84, X, Y)
print(longitude, latitude)


2.3102149762339614 48.85244604125717


  longitude, latitude = pyproj.transform(lambert, wgs84, X, Y)


In [50]:
import numpy as np
#longitude = 2.879539
#latitude = 42.663836


tolerance = 0.001
results = df_cleaned[
        (np.isclose(df_cleaned.longitude, longitude, atol=tolerance))
        & (np.isclose(df_cleaned.latitude, latitude, atol=tolerance))
    ]

results

Unnamed: 0,operateur,x,y,2G,3G,4G,operator_name,longitude,latitude
30340,20810,649392.0,6861473.0,0,1,1,SFR,2.310332,48.851309


In [56]:
import numpy as np

# Define the target x and y values
target_x = 649384.5
target_y = 6861599.47

# Define the tolerance
tolerance = 300

# Filter the DataFrame to find values close to the target
filtered_df = df_cleaned[
    (np.isclose(df_cleaned['x'], target_x, atol=tolerance)) &
    (np.isclose(df_cleaned['y'], target_y, atol=tolerance))
]

# Print the filtered DataFrame
print(filtered_df)


       operateur         x          y  2G  3G  4G operator_name  longitude  \
30224      20820  649207.0  6861836.0   1   1   1       Bouygue   2.307768   
30234      20801  649228.0  6861416.0   0   0   0        Orange   2.308104   
30239      20820  649232.0  6861481.0   1   1   1       Bouygue   2.308151   
30260      20801  649257.0  6861844.0   1   1   1        Orange   2.308448   
30340      20810  649392.0  6861473.0   0   1   1           SFR   2.310332   

        latitude  
30224  48.854559  
30234  48.850784  
30239  48.851369  
30260  48.854635  
30340  48.851309  


In [48]:
tolerance = 0.01
X_adjusted = 649384.0
Y_adjusted = 6861599.0


results_x = df_cleaned[
    (np.isclose(df_cleaned['x'], X_adjusted, atol=tolerance))
    & (np.isclose(df_cleaned['y'], Y_adjusted, atol=tolerance))
]

results_x

Unnamed: 0,operateur,x,y,2G,3G,4G,operator_name,longitude,latitude


In [30]:
df_cleaned[['x', 'y']].describe()

Unnamed: 0,x,y
count,77023.0,77023.0
mean,695288.9,6637132.0
std,206867.9,250440.2
min,102980.0,6050021.0
25%,569705.0,6422107.0
50%,674414.0,6689987.0
75%,854684.0,6859646.0
max,1240585.0,7113682.0


In [24]:
results_x

Unnamed: 0,operateur,x,y,2G,3G,4G,operator_name,longitude,latitude


In [20]:
df_cleaned.to_csv('../data/network_data_cleaned.csv')