# This file contains the code to prepare the CSV document from the questions document that are present in the google drive.

Each individual question from every `Google Docs` file are extracted and converted into suitable form to be loaded in to the machine learning algorithm

## Import all the necessary modules

In [1]:
from __future__ import print_function
import csv
from utils.check_credentials import check_cred
from utils.get_drive_contents import get_all_files, get_google_docs_files
from utils.convert_to_google_docs import files_to_google_doc
from utils.get_docs_contents import get_docs_contents
from utils.preprocess_docs_content import preprocess_text
import os.path
from googleapiclient.discovery import build
import json

## Define all the required runtime variables

In [2]:
# If modifying these scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/documents.readonly', 'https://www.googleapis.com/auth/drive']
creds = check_cred(SCOPES)

with open('constants.json') as file:
  constants = json.load(file)
folder_ids = [
 constants["FALL_2005"],
 constants["FALL_2011"],
 constants["FALL_2013"],
 constants["SPRING_2012"],
 constants["SPRING_2013"],
 constants["SPRING_2014"],
 constants["FALL_2012_FINAL"],
 constants["FALL_2012"],
 constants["FALL_2014"],
 constants["FALL_2015_COMP"],
 constants["FALL_2015_ELE"],
 constants["FALL_2015_ELX"],
 constants["FALL_2015_BE"],
 constants["FALL_2015_CIVIL"],
 constants["FALL_2015_COMMON"],
 constants["FALL_2015_SE"],
 constants["FALL_2015_ARCHITECTURE"]
]

docs_service = build('docs', 'v1', credentials=creds)
drive_service = build('drive', 'v3', credentials=creds)
drive_service2 = build('drive', 'v2', credentials=creds)

## Convert all the document in `.docx` or `.doc` format into `Google Docs` format

In [3]:
for folder_id in folder_ids:
  my_files = get_all_files(drive_service, folder_id)
  files_to_google_doc(drive_service, drive_service2, my_files, folder_id)

## Pre-process the questions to convert them into `CSV` format.

### Get all the `Google Docs` file `id` and `name`

In [4]:
with open("./ques/ques.csv", "a", newline='') as f:
  for folder_id in folder_ids:
    my_files = get_google_docs_files(drive_service, folder_id)
    for file in my_files:
      get_docs_contents(docs_service, file['id'], get="text")
      clean_questions = preprocess_text()
      write = csv.writer(f)
      write.writerows(clean_questions)