In [2]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from tkinter import filedialog, Tk
import random 
import plotly.express as px
import os

In [3]:
def add_window_and_flag_rows(df):
    df.columns = df.columns.str.replace('6_', '6.')
    new_cols = []
    for col in df.columns:
        if len(col) < 41:
            col += '_no-flag'
        new_cols.append(col)
    df.columns = new_cols
    window_values = df.columns[1:]
    window_row = ['window'] + [col.split('_')[-2] for col in window_values]
    flag_row = ['flag'] + [col.split('_')[-1] for col in window_values]
    window_df = pd.DataFrame([window_row], columns=df.columns)
    flag_df = pd.DataFrame([flag_row], columns=df.columns)
    
    return pd.concat([window_df, df, flag_df], ignore_index=True)

In [4]:
def open_csv_gui():
  root = Tk()
  root.withdraw()
  
  file_paths = filedialog.askopenfilenames(filetypes=[("CSV files", "*.csv")])
  
  if not file_paths:
    print("Файлы не выбраны. Программа завершена.")
    return
  
  for file_path in file_paths:
    df = pd.read_csv(file_path)
    df_with_window_flag = add_window_and_flag_rows(df)
  return df_with_window_flag

In [5]:
def process_dataframe(df_with_window_flag):
  df_T = df_with_window_flag.transpose()
  df_T.columns = df_T.iloc[0]
  df_T = df_T[1:]
  new_type = input("Введите значение для колонки 'type': ")
  df_T['type'] = new_type
  df_T['CC*'] = df_T['CC*'].str.replace(r'\s*\([^\)]*\)', '', regex=True)
  df_T['Multiplicity'] = df_T['Multiplicity'].str.replace(r'\s*\([^\)]*\)', '', regex=True)
  df_T['SNR'] = df_T['SNR'].str.replace(r'\s*\([^\)]*\)', '', regex=True)  
  df_T['Rsplit (%)'] = df_T['Rsplit (%)'].str.replace(r'\s*\([^\)]*\)', '', regex=True)
  df_T[['Indexed patterns', 'crystals']] = df_T['Indexed patterns/crystals'].str.split('/', expand=True)  
  df_T[['Rfree', 'Rwork']] = df_T['Rfree/Rwork'].str.split('/', expand=True)   
  df_T[['Num. patterns', 'hits']] = df_T['Num. patterns/hits'].str.split('/', expand=True)
  df_T['Indexed crystals/hits'] = df_T['crystals'].astype(float) / df_T['hits'].astype(float)
  return df_T

In [23]:
df_1 = open_csv_gui()
df_2 = open_csv_gui()
df_3 = open_csv_gui()
df_4 = open_csv_gui()
df_5 = open_csv_gui()
df_6 = open_csv_gui()
df_7 = open_csv_gui()

In [24]:
df_1 = process_dataframe(df_1)
df_2 = process_dataframe(df_2)
df_3 = process_dataframe(df_3)
df_4 = process_dataframe(df_4)
df_5 = process_dataframe(df_5)
df_6 = process_dataframe(df_6)
df_7 = process_dataframe(df_7)

In [25]:
df_concat = pd.concat([df_1,df_2,df_3,df_4,df_5,df_6,df_7])

In [26]:
# def plot_custom_scatter(df, chosen_window, y):
#     df_T_win = df[df['window'] == chosen_window]
#     df_T_win[y] = pd.to_numeric(df_T_win[y])

#     df_T_win = df_T_win.sort_values(by=y, ascending=True)
#     color_map = {'flag more than': 'red',
#              'static mask': 'green',
#              'dynamic mask w/o non hr': 'blue', 
#              'reference': 'purple',
#              'dynamic mask with non hr 10': 'orange',
#              'dynamic mask with non hr 15': 'cyan',
#              'dynamic mask with non hr 20': 'pink'}
#     # Задаем порядок флагов на оси X
#     #desired_flag_order = ['2500', '5000', '7500', '10000', '15000', 'no-flag']
#     #fig = px.scatter(df_T_win, x='flag', y=y, color='type', color_discrete_map=color_map)
#     fig = px.strip(df_T_win, x='flag', y=y, color='type', color_discrete_map=color_map, orientation='v')

#     fig.update_layout(
#         title=f"Window {chosen_window}",
#         #xaxis=dict(
#         #    tickvals=list(range(len(desired_flag_order))),
#         #    ticktext=desired_flag_order
#         #)
#     )

#     fig.update_traces(
#         marker_size=15,
#         marker_opacity=0.9
#     )
#     fig.update_traces(jitter=0.2) #
#     fig.update_layout(height = 400, width=700)
#     fig.update_layout(showlegend=True)
#     fig.update_yaxes(tickformat='.4f') 
#     fig.show()



In [27]:
# pd.set_option('display.max_rows', 100) 
# pd.set_option('display.max_columns', 100) 
# df_concat

In [28]:
def plot_custom_scatter(df, chosen_window, y):

  df_T_win = df[df['window'] == chosen_window]
  
  df_T_win[y] = pd.to_numeric(df_T_win[y])
  
  desired_order = ['2500', '5000', '7500', '10000', '15000', 'no-flag']

  df_T_win = df_T_win.loc[df_T_win['flag'].isin(desired_order)]

  df_T_win = df_T_win.sort_index(key=lambda x: x.map(dict(zip(desired_order, range(len(desired_order))))))

  color_map = {'flag more than': 'red', 
              'static mask': 'forestgreen',
              'dynamic mask w/o non hr': 'navy', 
              'reference': 'plum',
              'dynamic mask with non hr 10': 'orange',
              'dynamic mask with non hr 15': 'royalblue',
              'dynamic mask with non hr 20': 'firebrick'}
              
  fig = px.strip(df_T_win, x='flag', y=y, color='type', 
                 color_discrete_map=color_map, orientation='v')
                 
  fig.update_xaxes(categoryorder='array', categoryarray=desired_order)  

  fig.update_layout(title=f"Window {chosen_window}, high-resolution cutoff = 1.5")

  fig.update_traces(marker_size=15, marker_opacity=0.9)

  fig.update_traces(jitter=0)

  fig.update_layout(height = 450, width=650)

  fig.update_layout(showlegend=False)

  fig.update_yaxes(tickformat='.4f')

  fig.show()
  #fig.write_image("{}/{}_{}.png".format('/home/kelmanson/Desktop/pics_for_presentation/', y, chosen_window))

In [29]:
df_concat[df_concat['window'] == '5'][['Rsplit (%)']]

Unnamed: 0_no-flag,Rsplit (%)
FAKP_F2X_chipD_grid_fly_001_window_5_no-flag,10.83
FAKP_F2X_chipD_grid_fly_001_window_5_5000,10.89
FAKP_F2X_chipD_grid_fly_001_window_5_10000,11.1
FAKP_F2X_chipD_grid_fly_001_window_5_2500,10.81
FAKP_F2X_chipD_grid_fly_001_window_5_15000,11.33
FAKP_F2X_chipD_grid_fly_001_window_5_7500,11.09
FAKP_F2X_chipD_grid_fly_001_window_5_no-flag,10.97
FAKP_F2X_chipD_grid_fly_001_window_5_no-flag,10.32
FAKP_F2X_chipD_grid_fly_001_window_5_no-flag,10.53
FAKP_F2X_chipD_grid_fly_001_window_5_no-flag,10.36


In [30]:
number = 5
plot_custom_scatter(df_concat, f'{number}', 'Rsplit (%)')
#plot_custom_scatter(df_concat, f'{number}', 'Rfree')
#plot_custom_scatter(df_concat, f'{number}', 'Rwork')
#plot_custom_scatter(df_concat, f'{number}', 'SNR')




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [47]:
# plot_custom_scatter(df_concat, '1', 'CC*')
# plot_custom_scatter(df_concat, '2', 'CC*')
# plot_custom_scatter(df_concat, '4', 'CC*')
# plot_custom_scatter(df_concat, '5', 'CC*')
# plot_custom_scatter(df_concat, '6.1', 'CC*')
# plot_custom_scatter(df_concat, '6.2', 'CC*')
# plot_custom_scatter(df_concat, '9', 'CC*')

Collecting kaleido
  Using cached kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
Installing collected packages: kaleido
Successfully installed kaleido-0.2.1
Note: you may need to restart the kernel to use updated packages.
