<a href="https://colab.research.google.com/github/IraStreltsova/MissionSquare_Surveys/blob/main/RPS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

In [None]:
# Constants
RAW_DATA_FILE = 'RPS Survey Results July 2023.xlsx'
valid_scores = {
    'NPS': [1, 2, 3],
    'SAT': [1, 2, 3, 4, 5],
}

columns = {
    'NPS': 'Q5_NPS_GROUP',
    'SAT': 'Q2_5',
}
RESULTS_FILE = 'RPS_RESULTS.xlsx'

In [None]:
# Edit data
def read_and_clean_data(file_path):
  df = pd.read_excel(file_path)
  return df

In [None]:
# OVERALL SATISFACTION
def calculate_sat_metrics(df, column_name, valid_scores):
  """Calculate SAT metrics: AVG SCORE, TOP 1 BOX %, TOP 2 BOX %, and n."""
  columns = ['AVG SCORE', 'TOP 1 BOX %', 'TOP 2 BOX %', 'n']
  output_df = pd.DataFrame(columns=columns)

  # Filter rows where the column contains values 1, 2, 3, 4, or 5
  valid_data = df[df[column_name].isin(valid_scores)]

  # AVERAGE
  avg_score = valid_data[column_name].mean()

  # TOP 1 AND TOP 2 BOX
  summary = valid_data[column_name].value_counts(normalize=True) * 100
  top_1_box = summary.get(5, 0) / 100
  top_2_box = top_1_box + summary.get(4, 0)/100

  # NUMBER OF ROWS
  n = len(valid_data)

  # APPEND DATA to the output DataFrame
  output_df = output_df.append({
    'AVG SCORE': avg_score,
    'TOP 1 BOX %': top_1_box,
    'TOP 2 BOX %': top_2_box,
    'n': n
  }, ignore_index=True)

  return output_df

In [None]:
# NPS
def nps(dataframe, column_name, valid_scores):
  # Create an empty DataFrame 'nps'
  columns = ['Detractors (0-6)', 'Passives (7-8)', 'Promoters (9-10)', 'TOTAL', 'NPS', 'n']
  output_df = pd.DataFrame(columns=columns)

  # Filter rows where DISTRIBUTION is 1, 2, 3
  valid_data = dataframe[dataframe[column_name].isin(valid_scores)]

  # Group column '' and calculate percentages
  distribution_nps = valid_data[column_name].value_counts(normalize=True) * 100

  # Calculate percentages for NPS
  promoter = distribution_nps.get(3, 0) / 100
  passive = distribution_nps.get(2, 0)/ 100
  detractor = distribution_nps.get(1, 0)/ 100
  nps = (promoter - detractor)*100


  # NUMBER OF ROWS
  # Count rows for 'n'
  n_count_nps = len(valid_data)


  # APPEND DATA to the 'fpd_rec' DataFrame
  output_df = output_df.append({
    'Detractors (0-6)':detractor,
    'Passives (7-8)':passive,
    'Promoters (9-10)':promoter,
    'TOTAL':(detractor+passive+promoter),
    'NPS':nps,
    'n':n_count_nps
  }, ignore_index=True)

  return output_df

In [None]:
# EXPORTING DATA TO EXCEL
def export_to_excel(dataframes, file_path):
    with pd.ExcelWriter(file_path) as writer:
        for name, df in dataframes.items():
            df.to_excel(writer, sheet_name=name, index=True)

In [None]:
def main():
  df = read_and_clean_data(RAW_DATA_FILE)


  nps_res = nps(df, columns['NPS'], valid_scores['NPS'])
  sat_res = calculate_sat_metrics(df, columns['SAT'], valid_scores['SAT'])

  df_to_export = {
    'NPS': nps_res,
    'OVERALL_SAT': sat_res,
  }

  export_to_excel(df_to_export, RESULTS_FILE)


In [None]:
if __name__ == "__main__":
    main()

  output_df = output_df.append({
  output_df = output_df.append({
