# Coursework 1
## Part 4. Conversion calculation

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [2]:
DATA_PATH = "./Data"

In [3]:
df_dict = {}

for file_ in os.listdir(DATA_PATH):
    df_dict[file_] = pd.read_csv(os.path.join(DATA_PATH, file_))
    
df_dict["progresses.csv"].columns = ["progress_id", "student_id", "course_id"]

df_dict["students.csv"].columns = ['id_', 'student_id', 'city', 'birthday']
df_dict["students.csv"]["birthday"] = pd.to_datetime(df_dict["students.csv"]["birthday"], errors="coerce")

df_dict["courses.csv"].columns = ["index", "course_id", "course_title", "course_field"]
df_dict["courses.csv"].set_index("index", inplace=True)

dataset = pd.merge(left=df_dict["progresses.csv"],
                   right=df_dict["progress_phases.csv"],
                   on="progress_id",
                   how="outer")

dataset = pd.merge(left=dataset,
                   right=df_dict["courses.csv"],
                   on="course_id",
                   how="outer")

dataset = pd.merge(left=dataset,
                   right=df_dict["course_contents.csv"],
                   on=["course_id", "lesson_number", "module_number"],
                   how="outer")

dataset["start_date"] = (
    pd.to_datetime(dataset["start_date"], errors="coerce", utc=False)
    .dt.tz_localize(None)
    )

dataset["finish_date"] = (
    pd.to_datetime(dataset["finish_date"], errors="coerce", utc=False)
    .dt.tz_localize(None)
    )

dset_students = df_dict["students.csv"].copy(deep=True)
dset_students["age_years"] = (np.datetime64(str(2022)) - dset_students["birthday"]).astype("timedelta64[Y]")
#dset_students["student_age"] = dset_students["student_age"].astype("timedelta64[Y]")

print("Main dataset:", dataset.dtypes, dataset.columns, sep="\n", end="\n\n")
print("Student dataset:", dset_students.dtypes, dset_students.columns, sep="\n")

Main dataset:
progress_id              object
student_id               object
course_id                object
module_number           float64
lesson_number           float64
status                   object
start_date       datetime64[ns]
finish_date      datetime64[ns]
course_title             object
course_field             object
module_title             object
lesson_title             object
lesson_token             object
is_video                 object
is_homework              object
dtype: object
Index(['progress_id', 'student_id', 'course_id', 'module_number',
       'lesson_number', 'status', 'start_date', 'finish_date', 'course_title',
       'course_field', 'module_title', 'lesson_title', 'lesson_token',
       'is_video', 'is_homework'],
      dtype='object')

Student dataset:
id_                    int64
student_id            object
city                  object
birthday      datetime64[ns]
age_years            float64
dtype: object
Index(['id_', 'student_id', 'city', 'birth

### Conversion

ratio: number of students started current / number of student completed last

number of students started = for module_num ( "start_date" != NaN ).sum()<br>
number of students completed = for module_num-1 ( "finish_date" != NaN ).sum()

how to select start date and finish date?
in module group
groupby lesson_number
for start date : select lowest number group
for finish date : select biggest number group

Addition: conversion inside multi-homework modules?

In [22]:
for course_name, course_gr in dataset[dataset["is_homework"] == True].groupby("course_title"):
    
    module_groups = course_gr.groupby("module_number")
    modules_list = list(module_groups.groups)
    
    for mod_num, mod_index in enumerate(modules_list):
        current_mod = module_groups.get_group(mod_index)
        
current_mod

Unnamed: 0,progress_id,student_id,course_id,module_number,lesson_number,status,start_date,finish_date,course_title,course_field,module_title,lesson_title,lesson_token,is_video,is_homework
15562,aa64ea056fa167da0fbfaa10b5df9799,17c408ab90a10bbe05305778c28630c1,0770b1b039964228294f1f34b29fc2c1,16.0,8.0,done,2018-12-10 08:03:47.852354,2018-12-10 08:03:47.851934,Руководитель digital-проектов,Business,Контроль. Риски. Поощрения. Наказания. Обратна...,Домашняя работа,55ea3579-6d43-4a30-9dd4-a3640ca823a6,False,True
15563,29858e4a1689aba1d98bf89dbbdcab15,d57d7115cfc1a467f5f74774ab192590,0770b1b039964228294f1f34b29fc2c1,16.0,8.0,done,2018-02-22 14:56:49.591771,2018-02-22 14:56:49.591549,Руководитель digital-проектов,Business,Контроль. Риски. Поощрения. Наказания. Обратна...,Домашняя работа,55ea3579-6d43-4a30-9dd4-a3640ca823a6,False,True
15564,e1c08aa80c75ef8aef3e5ffd628328ce,bc1106ffe739d61e764d570a3838296f,0770b1b039964228294f1f34b29fc2c1,16.0,8.0,fail,2018-09-18 14:14:46.045637,NaT,Руководитель digital-проектов,Business,Контроль. Риски. Поощрения. Наказания. Обратна...,Домашняя работа,55ea3579-6d43-4a30-9dd4-a3640ca823a6,False,True
15565,d567303367e2bafdb04312942bdd3005,3cb91f40e08ab0eb5e5de813ada18645,0770b1b039964228294f1f34b29fc2c1,16.0,8.0,done,2019-06-28 08:07:11.529645,2019-07-02 05:05:33.651859,Руководитель digital-проектов,Business,Контроль. Риски. Поощрения. Наказания. Обратна...,Домашняя работа,55ea3579-6d43-4a30-9dd4-a3640ca823a6,False,True
15566,10f50cd867a6a452ca63118c7fe0a525,53149e25f8fd3609d4d610d59e94078f,0770b1b039964228294f1f34b29fc2c1,16.0,8.0,done,2018-02-20 09:09:49.245138,2018-02-20 09:09:49.247586,Руководитель digital-проектов,Business,Контроль. Риски. Поощрения. Наказания. Обратна...,Домашняя работа,55ea3579-6d43-4a30-9dd4-a3640ca823a6,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15621,86f0bdcee65804aadc08bf856a575c63,dd3633fc508aab63e251254bf9f69b8d,0770b1b039964228294f1f34b29fc2c1,16.0,8.0,done,2019-04-25 05:22:22.410145,2019-05-21 04:50:49.947282,Руководитель digital-проектов,Business,Контроль. Риски. Поощрения. Наказания. Обратна...,Домашняя работа,55ea3579-6d43-4a30-9dd4-a3640ca823a6,False,True
15622,8e9d4b3bcbc983abb3a8543b559329fb,25a9dab527913111f2716daccd9c5815,0770b1b039964228294f1f34b29fc2c1,16.0,8.0,done,2018-12-17 10:08:09.747921,2018-12-21 07:08:17.740804,Руководитель digital-проектов,Business,Контроль. Риски. Поощрения. Наказания. Обратна...,Домашняя работа,55ea3579-6d43-4a30-9dd4-a3640ca823a6,False,True
15623,567b6330c555ecc802de0e51741a50fc,c7623c47650fb1ade9268e2d3835655c,0770b1b039964228294f1f34b29fc2c1,16.0,8.0,done,2018-02-22 07:43:12.110258,2018-02-22 07:43:12.110027,Руководитель digital-проектов,Business,Контроль. Риски. Поощрения. Наказания. Обратна...,Домашняя работа,55ea3579-6d43-4a30-9dd4-a3640ca823a6,False,True
15624,b24fc3de3d477f969a2f968f0a75e626,d1121122c6378eeae3e8ab2b57a7fb5c,0770b1b039964228294f1f34b29fc2c1,16.0,8.0,start,2018-03-09 16:33:17.446415,NaT,Руководитель digital-проектов,Business,Контроль. Риски. Поощрения. Наказания. Обратна...,Домашняя работа,55ea3579-6d43-4a30-9dd4-a3640ca823a6,False,True
