In [1]:
import numpy as np
import pandas as pd
from datetime import date


In [2]:
# local connection information
import local_db

connection = local_db.connection()


In [3]:
# utility functions
import util


In [4]:
today = date.today()
today_str = today.strftime("%Y%m%d")

print(today_str)


20180524


In [5]:
sql_str = (
    "SELECT PEOPLE_CODE_ID, TEST_ID, TEST_TYPE, "
    + "CONVERTED_SCORE, TEST_DATE "
    + "FROM TESTSCORES WHERE "
    + "TEST_ID = 'ACC' "
    + "AND ( TEST_TYPE = 'MATH' "
    + "OR TEST_TYPE = 'ENGL' ) "
)
df = pd.read_sql_query(sql_str, connection, parse_dates=["TEST_DATE"])


In [6]:
print(df.shape)


(4552, 5)


In [None]:
df.head()


In [7]:
df = df[df["TEST_DATE"].notnull()]


In [8]:
df.loc[(df["TEST_TYPE"] == "MATH"), "test_id"] = "ACCUPLACER_MATH"
df.loc[(df["TEST_TYPE"] == "ENGL"), "test_id"] = "ACCUPLACER_ENGLISH"


In [9]:
df["numeric_score"] = df["CONVERTED_SCORE"].dropna().apply(np.int64)


In [None]:
df.info()


In [10]:
df["date_taken"] = df["TEST_DATE"].dt.strftime("%Y-%m-%d")


In [None]:
df.head()


In [11]:
print(df.shape)


(4549, 8)


In [12]:
# keep records for active students
df = util.apply_active(in_df=df)


In [13]:
print(df.shape)


(1037, 8)


In [14]:
df = df.rename(columns={"PEOPLE_CODE_ID": "student_integration_id"})


In [15]:
df = df.loc[:, ["student_integration_id", "test_id", "numeric_score", "date_taken"]]


In [16]:
df = df.sort_values(
    ["student_integration_id", "test_id", "numeric_score"]
).drop_duplicates(["student_integration_id", "test_id"], keep="last")


In [17]:
print(df.shape)


(1025, 4)


In [18]:
fn_output = f"{today_str}_student_test_results.txt"
df.to_csv(fn_output, index=False)
