# Loading the questions

We have different kinds of questions, here im gonna explain about each type.

## Regular Questions:
    Is a question that you have a scale of answers and you could only pick one answer.
    For example:
        Q: How satisfied are you so far?
        A:  1. Not so.
            2. Kind Of.
            3. Very.

## Semi Open Questions:
    Is a question that have multiple answers to select from (you can select only one).
    There is an option to select free text and provide your own answer to the given question.

    for example:
        Q: What is your greatest skill?
        A:  1. Thinking outside the box.
            2. Communication.
            3. Hard working.
            4. Other (explain):

## Multiple Options Questions:
    Similar to a regular question but here you can pick multiple answers.
    For example:
        Q: What are the programming languages that you know (you may pick more than one).
        A:  1. Python.
            2. Java.
            3. C#.
            4. Perl.
            5. Ruby.

## Fully Open Questions:
    In this kind of questions you are given a question without a set of answers to choose from.
    You have to provide your own answer to the question.

    For example:
        Q: Explain why we should hire you.
        A: (can be any answer)

In [1]:
# imports
import pandas as pd
import os
from helper import *

In [2]:
kod_file = pd.read_excel(os.getcwd() + r'\seker\seker_kod.xlsx')
label_file = pd.read_excel(os.getcwd() + r'\seker\seker_label.xlsx')

kod_file.set_index(keys=kod_file.columns[0], drop=True, inplace=True)
label_file.set_index(keys=label_file.columns[0], drop=True, inplace=True)

In [3]:
# create df of shem sheela and mezahe sheela.
q_names = kod_file.columns.tolist()
q_mezahe = kod_file.iloc[0]
questions = list(zip(q_names, q_mezahe))

questions_df = pd.DataFrame(questions, columns=['shem_sheela','mezahe_sheela'])
questions_df = questions_df[questions_df['mezahe_sheela'].isnull() == False]
questions_df.set_index(keys=questions_df.columns[1], drop=False, inplace=True)

In [4]:
kod_df = kod_file[questions_df['shem_sheela']]
kod_df.columns = kod_df.iloc[0].tolist()
kod_df.drop(index=kod_df.index[0], inplace=True)

label_df = label_file[questions_df['shem_sheela']]
label_df.columns = label_df.iloc[0].tolist()
label_df.drop(index=label_df.index[0], inplace=True)

In [5]:
cols = questions_df.index.tolist()
semi_open = []
for i in range(1,len(cols)):
    cols[i] = str(cols[i])
    if cols[i] == cols[i-1]:
        cols[i] = cols[i] + '_dup'
        semi_open.append(cols[i-1])

questions_df.index = cols
for dup in semi_open:
    to_drp = dup + '_dup'
    questions_df.drop(index=to_drp, inplace=True)

questions_df['kod_sug_sheela'] = 1
questions_df['kod_skala'] = 0

In [6]:
kod_df = handle_semi_open(kod_df)
label_df = handle_semi_open(label_df)

data = pd.merge(kod_df, label_df, on=kod_df.index, how='inner', suffixes=('_kod', '_label'))
data.set_index(keys=data.columns[1], drop=False, inplace=True)

In [7]:
frames = []
for q in questions_df.index.to_list():
    df = data[['key_0', str(q) + '_kod', str(q) + '_label']]
    df['mezahe_sheela'] = q
    df.columns = ['mezahe_reshuma', 'kod_tshuva', 'shem_tshuva', 'mezahe_sheela']
    frames.append(df)

tshuvot = pd.concat(frames)
tshuvot.set_index(keys=['mezahe_reshuma', 'mezahe_sheela'], drop=False, inplace=True)
tshuvot['ind_free_text'] = 0

In [8]:
sheelot_ptuchut = tshuvot[tshuvot['kod_tshuva'] == tshuvot['shem_tshuva']]['mezahe_sheela'].unique().tolist()
sheelot_rav_breira = [x for x in tshuvot['mezahe_sheela'].unique().tolist() if len(str(x).split('.')) == 3]

In [9]:
open_q = []
for p in sheelot_ptuchut:
    df = tshuvot.query(f"mezahe_sheela == '{p}'")
    df['kod_tshuva'] = range(len(df['kod_tshuva'].tolist()))
    df['ind_free_text'] = 1
    questions_df['kod_sug_sheela'][p] = 3
    open_q.append(df)
open_q_df = pd.concat(open_q)

In [10]:
semi_q = []
for s in semi_open:
    df = tshuvot.query(f"mezahe_sheela == '{s}'")
    df['kod_tshuva'] = [tshu[0] for tshu in df['kod_tshuva'].tolist()]
    ans = [(tshu[1] if tshu[1] != 'nan' else tshu[0], 1 if tshu[1] != 'nan' else 0) for tshu in df['shem_tshuva'].tolist()]
    tshu_list = [x[0] for x in ans]
    ind_list = [x[1] for x in ans]
    df['ind_free_text'] = ind_list
    df['shem_tshuva'] = tshu_list
    questions_df['kod_sug_sheela'][s] = 4
    semi_q.append(df)
semi_q_df = pd.concat(semi_q)

In [11]:
rav = {}
sheelot_rav = {}
sheelot_tshuvut = {}

for r in sheelot_rav_breira:
    sheelat_av = '.'.join(r.split('.')[0:2])
    if sheelat_av not in rav:
        rav[sheelat_av] = []
    rav[sheelat_av].append(r)
print(rav)

for sheelat_av,sheelot in rav.items():
    shemot = []
    for sh in sheelot:
        shemot.append(questions_df['shem_sheela'][sh])
    sheelot_rav[sheelat_av] = get_lcs_list(shemot)
print(sheelot_rav)

for r in sheelot_rav_breira:
    shem_s = questions_df['shem_sheela'][r]
    sheelat_av = '.'.join(r.split('.')[0:2])
    sheela_beemet = sheelot_rav[sheelat_av]
    tshuva = shem_s.replace(sheela_beemet, '')
    sheelot_tshuvut[r] = tshuva
print(sheelot_tshuvut)

{'22.53': ['22.53.1', '22.53.2', '22.53.3']}
{'22.53': 'מי מהגורמים חושבים שאתה סתום? '}
{'22.53.1': 'אבא', '22.53.2': 'אמא', '22.53.3': 'חברים'}


In [12]:
sheelot_av = []
for sheelat_av,sheelot in rav.items():
    questions_df.drop(sheelot,inplace=True)
    sheelot_av.append((sheelot_rav[sheelat_av], sheelat_av, 2, 0))

sheelot_av_df = pd.DataFrame(sheelot_av, columns=['shem_sheela','mezahe_sheela','kod_sug_sheela','kod_skala'],)
sheelot_av_df.set_index(keys='mezahe_sheela', drop=False, inplace=True)

questions_df = pd.concat([questions_df, sheelot_av_df])
questions_df.reset_index(inplace=True, drop=True)

In [13]:
rav_q = []
for sheelat_av,sheelot in rav.items():
    temp_df = tshuvot[tshuvot['mezahe_sheela'].isin(sheelot)]
    tshuvot.drop(index=temp_df.index.tolist(), inplace=True)
    temp_idx = temp_df[temp_df['kod_tshuva'] == 0].index.tolist()
    temp_df.drop(index=temp_idx, inplace=True)
    temp_df['kod_tshuva'] = [x.split('.')[2] for x in temp_df['mezahe_sheela'].tolist()]
    temp_df['shem_tshuva'] = [sheelot_tshuvut[x] for x in temp_df['mezahe_sheela'].tolist()]
    temp_df['mezahe_sheela'] = sheelat_av
    temp_df.reset_index(inplace=True, drop=True)
    temp_df.set_index(keys=['mezahe_reshuma', 'mezahe_sheela'], inplace=True, drop=False)
    rav_q.append(temp_df)

rav_q_df = pd.concat(rav_q)

In [14]:
tshuvot.drop(index=open_q_df.index.tolist(),inplace=True)
tshuvot.drop(index=semi_q_df.index.tolist(),inplace=True)

tshuvot = pd.concat([tshuvot, open_q_df, semi_q_df, rav_q_df])
tshuvot.sort_index(inplace=True)

In [15]:
tshuvot

Unnamed: 0_level_0,Unnamed: 1_level_0,mezahe_reshuma,kod_tshuva,shem_tshuva,mezahe_sheela,ind_free_text
mezahe_reshuma,mezahe_sheela,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
123456.0,2.3,123456.0,2.0,השאר,2.3,0
123456.0,13.37,123456.0,4.0,הייתי מנסה ללמוד יותר בבית,13.37,1
123456.0,14.39,123456.0,0.0,יש מלא דברים,14.39,1
123456.0,22.53,123456.0,1.0,אבא,22.53,0
123456.0,22.53,123456.0,3.0,חברים,22.53,0
123456.0,3.4,123456.0,5.0,במידה רבה מאוד,3.4,0
123456.0,4.4,123456.0,3.0,במידה בינונית,4.4,0
222222.0,2.3,222222.0,1.0,קצינים,2.3,0
222222.0,13.37,222222.0,2.0,קורא ספרים,13.37,0
222222.0,14.39,222222.0,1.0,שולחנות לא נקיים,14.39,1


In [16]:
questions_df

Unnamed: 0,shem_sheela,mezahe_sheela,kod_sug_sheela,kod_skala
0,מיפוי,2.3,1,0
1,באיזה מידה את\ה מרוצה משירותך בצבא?,3.4,1,0
2,באיזה מידה את\ה סתום?,4.4,1,0
3,כיצד היית הופך אותך לפחות סתום?,13.37,4,0
4,ממה אתה לא מרוצה? פרט\י,14.39,3,0
5,מי מהגורמים חושבים שאתה סתום?,22.53,2,0
