## Moodle Database Schema Understanding

In [54]:
import psycopg2
import pandas as pd

In [95]:
conn = psycopg2.connect(host = '127.0.0.1',
                       database = 'moodle',
                       user = 'postgres',
                       password = 'password')

cur = conn.cursor()

#### The number of tables

In [96]:
cur.execute('SELECT COUNT(*) FROM information_schema.tables')

rows = cur.fetchall()

rows

[(642,)]

#### The number of records in each of the tables given in the MIT section

In [104]:
def table_rows(tables):
    row_num = []
    for table in tables:
        cur.execute('select count(*) from {t}'.format(t=table))
        rows = cur.fetchall()
        row_num.append([table, rows])
    
    tables_df = pd.DataFrame(row_num, columns = ['Table', 'Number of rows'])
    return tables_df

In [106]:
table_rows(['mdl_logstore_standard_log', 'mdl_context', 'mdl_user', 'mdl_course', 'mdl_modules', 'mdl_course_modules', 'mdl_course_modules_completion', 'mdl_grade_items', 'mdl_grade_grades', 'mdl_grade_categories', 'mdl_grade_items_history', 'mdl_grade_grades_history', 'mdl_grade_categories_history', 'mdl_forum', 'mdl_forum_discussions', 'mdl_forum_posts'])

Unnamed: 0,Table,Number of rows
0,mdl_logstore_standard_log,"[(417554,)]"
1,mdl_context,"[(4359,)]"
2,mdl_user,"[(1052,)]"
3,mdl_course,"[(15,)]"
4,mdl_modules,"[(26,)]"
5,mdl_course_modules,"[(290,)]"
6,mdl_course_modules_completion,"[(4483,)]"
7,mdl_grade_items,"[(113,)]"
8,mdl_grade_grades,"[(3643,)]"
9,mdl_grade_categories,"[(16,)]"


#### Number of quiz submissions by hour of day

In [116]:
cur.execute("select count(id), EXTRACT(HOUR FROM to_timestamp(timecreated)) AS HOUR from mdl_logstore_standard_log where action='submitted' AND component='mod_quiz' group by EXTRACT(HOUR FROM to_timestamp(timecreated))")

rows = cur.fetchall()

data = pd.DataFrame(rows, columns = ['Number of quiz submission', 'Hour of day']).set_index('Hour of day')

data

Unnamed: 0_level_0,Number of quiz submission
Hour of day,Unnamed: 1_level_1
0.0,26
1.0,23
2.0,30
3.0,29
4.0,7
5.0,14
6.0,10
7.0,18
8.0,17
9.0,34


#### Monthly usage time of learners who have confirmed and are not deleted

#### Count of log events per user for the following verbs: ['loggedin', 'viewed', 'started', ,'submitted', 'uploaded', 'updated', 'searched', 'resumed', 'answered', 'attempted', 'abandoned']

In [114]:
def log_events(events):
    df = pd.DataFrame(columns = ['userid'])
    for event in events:
        cur.execute("Select userid, count(*) from mdl_logstore_standard_log where action = '{e}' group by userid".format(e=event))
        rows = cur.fetchall()
        df1 = pd.DataFrame(rows, columns = ['userid', event])
        df = df.merge(df1, on = ['userid'], how = 'outer').fillna(0)
        
    events_log = df.copy().set_index('userid')
    return events_log

In [115]:
log_events(['loggedin', 'viewed', 'started', 'submitted', 'uploaded', 'updated', 'searched', 'resumed', 'answered', 'attempted', 'abandoned'])

Unnamed: 0_level_0,loggedin,viewed,started,submitted,uploaded,updated,searched,resumed,answered,attempted,abandoned
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,1.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0
2,169.0,2492,14.0,21.0,21.0,2551.0,0.0,0.0,0.0,0,5.0
3,107.0,4041,17.0,2.0,6.0,1467.0,0.0,4.0,0.0,0,0.0
5,54.0,694,11.0,5.0,1.0,19.0,0.0,2.0,2.0,0,0.0
7,3.0,81,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
1046,0.0,9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0
1049,0.0,9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0
1050,0.0,9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0
1051,0.0,9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0


#### Python class to pull: overall grade of learners and number of forum posts

In [124]:
class GradeForums:
    def _init_(self):
        '''
        a python class that pulls the overall grade of learners with the grade() method and the number of forum posts with the forums() method.
        both methods return a dataframe with the information pulled fromm the database.
        '''
        
    def grade():
        cur.execute('select userid, sum(finalgrade) from mdl_grade_grades group by userid')
        rows = cur.fetchall()
        df = pd.DataFrame(rows, columns = ['userid', 'overall grade'])
        return df
    
    def forums():
        cur.execute('select count(*) from mdl_forum_posts')
        rows = cur.fetchall()
        df = pd.DataFrame(rows, columns = ['Number of forum posts'])
        return df

In [125]:
gf = GradeForums
gf.grade()

Unnamed: 0,userid,overall grade
0,351,418.00000
1,87,200.00000
2,184,200.00000
3,116,218.66666
4,273,200.00000
...,...,...
402,130,402.00000
403,23,200.00000
404,270,224.00000
405,846,2345.80954


In [126]:
gf.forums()

Unnamed: 0,Number of forum posts
0,131
