In [1]:
import pandas as pd
import numpy as np

df_preprocessed = pd.read_csv('../data/raw/prosettings.csv')

### Split Multiple Sens

In [2]:
df_preprocessed['dpi'] = df_preprocessed['dpi'].str.split(',').str[0]
df_preprocessed['sens'] = df_preprocessed['sens'].str.split(',').str[0]
df_preprocessed['mouse'] = df_preprocessed['mouse'].str.split(',').str[0]

### Drop NaN

In [3]:
df_preprocessed.isna().sum()

name         0
country      0
mouse       10
dpi          4
sens       101
dtype: int64

In [4]:
df_preprocessed = df_preprocessed.replace('<NA>', np.nan)
df_preprocessed = df_preprocessed.dropna(subset=['dpi', 'mouse'])

### Convert to Int and Float

In [5]:
df_preprocessed['dpi'] = pd.to_numeric(df_preprocessed['dpi'], errors='coerce')
df_preprocessed['dpi'] = df_preprocessed['dpi'].astype('Int64')

In [6]:
df_preprocessed['sens'] = pd.to_numeric(df_preprocessed['sens'], errors='coerce')
df_preprocessed['sens'] = df_preprocessed['sens'].astype(float)

In [7]:
average_sens = df_preprocessed.groupby('dpi')['sens'].mean()

df_preprocessed['sens'] = df_preprocessed['sens'].fillna(
    df_preprocessed['dpi'].map(average_sens))
df_preprocessed['sens'] = df_preprocessed['sens'].round(3)

df_preprocessed = df_preprocessed.dropna(subset=['sens'])

In [8]:
df_preprocessed.dtypes

name        object
country     object
mouse       object
dpi          Int64
sens       float64
dtype: object

In [9]:
df_preprocessed.head()

Unnamed: 0,name,country,mouse,dpi,sens
0,ScreaM,Belgium,Logitech G Pro X Superlight Black,400,0.725
1,yay,United States,Logitech G Pro X Superlight Black,800,0.27
2,dev1ce,Denmark,Logitech G Pro X Superlight Magenta,400,1.9
3,mL7,Romania,Razer DeathAdder Essential,800,0.617
4,kennyS,France,Razer Deathadder V3 Pro Black,400,2.2


### Save Processed Data

In [10]:
df_preprocessed.to_csv('../data/processed/processed_prosettings.csv', sep=',', header=True, index=False)