In [1]:
import pandas as pd
import numpy as np

In [2]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = 'plotly_white'

In [3]:
path='/mnt/cephfs/ml_data/mc_2021/'
df_train = pd.read_csv(f'{path}processed_data/ProcessedTrainReal/ProcessedTrain.csv.gz')
df_train = df_train[df_train['edepR'] < 17.7]

In [4]:
df_train['cos_theta'] = df_train['edepZ'] / df_train['edepR'] 

In [5]:
df = df_train[['edepR', 'AccumCharge', 'edep']]
df['AverageCharge'] = df['AccumCharge'] / (df['edep'] + 1.022)
df = df[['edepR', 'AverageCharge']]
df.columns = ['R, m', 'Average accumulated charge per MeV, PE/MeV']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['AverageCharge'] = df['AccumCharge'] / (df['edep'] + 1.022)


In [6]:
df = df[df['Average accumulated charge per MeV, PE/MeV'] < 2500]

In [7]:
df

Unnamed: 0,"R, m","Average accumulated charge per MeV, PE/MeV"
0,15.858651,1796.066041
1,16.291675,1533.609202
2,13.867370,1586.945455
3,8.595446,1624.326124
4,17.343020,1540.153440
...,...,...
5014927,16.516622,1676.343501
5014928,16.655182,1424.722259
5014929,15.182683,1777.059916
5014930,11.633990,1671.492506


In [12]:
import plotly.express as px
import datashader as ds

cvs = ds.Canvas(plot_width=150, plot_height=150)
agg = cvs.points(df, 'R, m', 'Average accumulated charge per MeV, PE/MeV')
zero_mask = agg.values == 0
agg.values = np.exp(np.log(agg.values, where=np.logical_not(zero_mask)))
agg.values[zero_mask] = np.nan

xmin = 0
xmax = 18
ymax = 2000
ymin = 1500

xaxis = dict(
    showline=True,
    ticks='outside',
    mirror=True,
    dtick=3,
    range=[0, 18],
    linecolor='black',
    showgrid=True,
    gridcolor='grey',
    gridwidth=0.25,
)

yaxis = dict(
    showline=True,
    ticks='outside',
    mirror=True,
    linecolor='black',
    range=[800, 2200],
    dtick=200,
    showgrid=True,
    gridcolor='grey',
    gridwidth=0.25,
    zeroline=True,
    zerolinecolor='black',
    zerolinewidth=0.25
)

fig = px.imshow(agg, origin='lower')
fig.update_traces(hoverongaps=False)
fig.update_layout(
    coloraxis_colorbar=dict(
        title='Count',
    ),
    xaxis=xaxis,
    yaxis=yaxis,
    font=dict(
        family="Times New Roman",
        size=19,
        color="Black"
    ),
)

fig.show()
pio.write_image(fig, 'plots/mean_charge_vs_R.pdf', width=800, height=500)

In [9]:
df.describe()

Unnamed: 0,"R, m","Average accumulated charge per MeV, PE/MeV"
count,5014917.0,5014917.0
mean,13.27363,1587.711
std,3.426813,181.9848
min,0.06955543,715.7395
25%,11.14838,1491.169
50%,14.04884,1622.277
75%,16.08132,1717.789
max,17.69968,1974.009
