# Analysis of Safecast radiation data

In [None]:
import pandas as pd
import plotly.express as px

## Data loading

### Load the dataset

In [None]:
data = pd.read_csv('data/safecast.csv')

df = pd.DataFrame(data)
df

## Data preprocessing

Now we can drop all rows with data that has incorrect unit. Most of measurements we get use `cpm` unit, so we will remove measurements with different units.

In [None]:
df = df[df['unit'] == 'cpm']

In [None]:
df

We can see that location name is missing for some measurements. We will replace the `NaN` value with the `'Unknown location'` string.

In [None]:
df.loc[df['location_name'].isnull(), 'location_name'] = 'Unknown location'

We can convert all `float64` types to `float32` for faster calculations

In [None]:
df.dtypes

In [None]:
to_convert = ['value', 'device_id', 'height', 'station_id', 'latitude', 'longitude']
for col in to_convert:
    df.loc[:, col] = pd.to_numeric(df[col], errors='coerce')
    
df.dtypes

## Data visualization

In [None]:
fig = px.scatter_geo(
    df, 
    lat='latitude', 
    lon='longitude', 
    color='value',
    hover_name='location_name',
    title='Radiation levels',
    color_continuous_scale=['green', 'yellow', 'red', 'purple'],
)
fig.update_layout(height=800)
fig.show()