In [1]:
from typing import Collection
import pandas as pd
import numpy as np
import gspread 
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials
from Naked.toolshed.shell import muterun_js, execute_js
from distutils.dir_util import copy_tree
import shutil
# import jsbeautifier
import os
import urllib.request
import requests
import json
import re
import sys
import hashlib
from datetime import datetime
import time

In [37]:
pd.options.display.html.use_mathjax = False

In [2]:
from create_dir import *
from create_content import *
from pytexit import py2tex

UnsupportedOperation: detach

In [50]:
supported_operators = ["**", "/", "*", "+", ">", "<", "="]
supported_word_operators = ["sqrt", "abs", "inf"]
replace = {"⋅" : "*", "−" : "-", "^" : "**", "𝑥" : "x", "𝑎" : "a", "𝑏" : "b", "𝑦" : "y", "–": "-", "≥" : ">=", "≤": "<=", "∪" : "U"}
conditionally_replace = {"[" : "(", "]" : ")"}
regex = re.compile("|".join(map(re.escape, replace.keys())))

#Figure out way to deal with equal signs
def preprocess_text_to_latex(text, tutoring=False, stepMC= False):
    text = str(text)
    text = regex.sub(lambda match: replace[match.group(0)], text)
    if not re.findall("[\[|\(][-\d\s\w]+,[-\d\s\w]+[\)|\]]", text): #Checking to see if there are coordinates/intervals before replacing () with []
        text = regex.sub(lambda match: conditionally_replace[match.group(0)], text)
    
    
    #Account for space in sqrt(x, y)
    text = re.sub(r"sqrt[\s]?\(([^,]+),[\s]+([^\)])\)", r"sqrt(\g<1>,\g<2>)", text)
    text = re.sub(r"sqrt(?:\s*)?\(", r"sqrt(", text)
    text = re.sub(r"abs(?:\s*)?\(", r"abs(", text)
    text = re.sub("\([\s]*([-\d]+)[\s]*,[\s]*([-\d]+)[\s]*\)", "(\g<1>,\g<2>)", text) #To account for coordinates
    for operator in supported_operators:
        text = re.sub("(\s?){0}(\s?)".format(re.escape(operator)), "{0}".format(operator), text)
        
    words = text.split()
    latex = False
    for i in list(range(len(words))):
        word = words[i]
        
        if any([op in word for op in supported_operators]) or any([op in word for op in supported_word_operators]):
            punctuation = re.findall("[\?\.,:]+}$", word) #Capture all the punctuation at the end of the sentence
            if punctuation:
                punctuation = punctuation[0]
            else:
                punctuation = ""
            word = re.sub("[\?\.,:]+}$", "", word)
            try:                
                sides = re.split('(=|U|<=|>=)', word)
                sides = [handle_word(side) for side in sides]
                new_word = ""
                if tutoring and stepMC:
                    new_word = "$$" + "".join(sides) + "$$"
                    #sides = ["$$" + side + "$$" for side in sides] 
                elif tutoring:
                    new_word = "$$" + "".join(sides) + "$$"
                    # new_word = "$$" + "".join([side.replace("\\", "\\\\") for side in sides]) + "$$"
                    #sides = ["$$" + side.replace("\\", "\\\\") + "$$" for side in sides]
                else:
                    new_word = "<InlineMath math=\"" + "".join(sides) + "\"/>"
                    #sides = ["<InlineMath math=\"" + side + "\"/>" for side in sides]
                #new_word = "=".join(sides)
                new_word += punctuation
                latex=True
                words[i] = new_word
                
            except Exception as e:
                print("This failed")
                print(word)
                print(e)
                pass
    text = " ".join(words)
    return text, latex

def handle_word(word):
    latex_dic = {"=": "=", "U": " \cup ", "<=" : " \leq ", ">=" : " \geq "}
    if word in latex_dic:
        return latex_dic[word]
    
    coordinates = re.findall("[\(|\[][-\d\s]+,[-\d\s]+[\)|\]]",word)
    if coordinates:
        word = re.sub("inf", r"\\infty", word)
        return word
    
    word = re.sub("\+/-", "pm(a)", word)
    
    original_word = word
    scientific_notation = re.findall("\(?([\d]{2,})\)?\*([\d]{2,})\*\*", word)
    word = re.sub(":sqrt", ": sqrt", word)
    square_roots = re.findall(r"sqrt\(([^,]*)\,([^\)]*)\)", word)
    word = re.sub(",", "", word)
    for root in square_roots:
        word = re.sub(r"sqrt\("+root[0]+root[1]+"\)", r"sqrt("+root[0]+","+root[1]+")", word)
    #word = re.sub(r"sqrt\(([^,]*)\,([^\)]*)\)", r"sqrt(\g<1>:\g<2>)", "sqrt(2, 3)")
    word = re.sub(r"([\w])(\(+[\w])", "\g<1>*\g<2>", word)
    word = re.sub(r"(\)+)([\w])", "\g<1>*\g<2>", word)
    word = re.sub(r"(\))(\()", "\g<1>*\g<2>", word)
    word = re.sub(r"([0-9]+)([a-zA-Z])", "\g<1>*\g<2>", word)
    #word = re.sub( r"([a-zA-Z])(?=[a-zA-Z])" , r"\1*" , word)
    word = re.sub(r"sqrt\*", r"sqrt", word)
    word = re.sub(r"abs\*", r"abs", word)
    word = re.sub(r"pm\*", r"pm", word)
    word = py2tex(word, simplify_output=False)
    
    #Here do the substitutions for the things that py2tex can't handle
    for item in scientific_notation:
        word = re.sub(item[0] + "\{" + item[1] + "\}", item[0] + "\\\\times {" + item[1] + "}", word)
    word = re.sub(r"\\operatorname{pm}\\left\(a\\right\)(\\times)?", r"\\pm ", word)
    
    return word[2:-2]

In [46]:
path = "../Excel/Book1.xlsx"
df = pd.read_excel(path, "Sheet3", header=0, dtype=str)

In [47]:
df = df[["Problem Name","Row Type","Title","Body Text","Answer", "answerType", "HintID", "Dependency", "mcChoices", "Images (space delimited)","Parent","OER src","openstax KC", "KC","Taxonomy"]]
df = df.astype(str)
df.replace('nan', float(0.0), inplace = True)
df["Body Text"] = df["Body Text"].str.replace("\"", "\\\"")
df["Title"] = df["Title"].str.replace("\"", "\\\"")

In [41]:
df

Unnamed: 0,Problem Name,Row Type,Title,Body Text,Answer,answerType,HintID,Dependency,mcChoices,Images (space delimited),Parent,OER src,openstax KC,KC,Taxonomy
0,complex6,problem,Multiplying a Complex Number by a Real Number,Find the product.,0,0,0,0.0,0,0.0,0.0,http://openstax.org,Multiplying Complex Numbers,Multiplying Complex Numbers,http://openstax.org
1,complex6,step,12(5−2i),,60-24i,mc,0,0.0,60|-24i|60-24i|50-24i,0.0,0.0,0,0,0,0
2,complex6,hint,Using the Distributive Property,The first step is to use the Distributive Prop...,0,0,h1,0.0,0,0.0,0.0,0,0,0,0
3,complex6,scaffold,Rewriting the Expression With the Distributive...,What does the expression look like after it is...,12*5-12*2i,mc,h2,0.0,5-2i|12*5-12*2i|12i|5i,0.0,0.0,0,0,0,0
4,complex6,scaffold,Product of First Term,What is the product of 12*5?,60,numeric,h3,0.0,0,0.0,0.0,0,0,0,0
5,complex6,scaffold,Product of Second Term,What is the product of 12*2i?,24i,mc,h4,0.0,24i|12i|2i|16i,0.0,0.0,0,0,0,0


In [42]:
mc_answers = df.iloc[3]["mcChoices"]
answer = df.iloc[3]["Answer"]

In [43]:
[mc_answer for mc_answer in mc_answers.split("|")]

['5-2i', '12*5-12*2i', '12i', '5i']

In [51]:
preprocess_text_to_latex('12*5-12*2i', True)

<IPython.core.display.Latex object>

$$\left(12\right) \left(5\right)-\left(12\right) \left(2\right) i$$


('$$\\left(12\\right) \\left(5\\right)-\\left(12\\right) \\left(2\\right) i$$',
 True)

In [31]:
[preprocess_text_to_latex(mc_answer, True)[0] for mc_answer in mc_answers.split("|") if mc_answer]

<IPython.core.display.Latex object>

$$\left(12\right) \left(5\right)-\left(12\right) \left(2\right) i$$


['5-2i',
 '$$\\\\left(12\\\\right) \\\\left(5\\\\right)-\\\\left(12\\\\right) \\\\left(2\\\\right) i$$',
 '12i',
 '5i']

In [56]:
text = "i represents sqrt(-1)."
text = re.sub(r"sqrt[\s]?\(([^,]+),[\s]+([^\)])\)", r"sqrt(\g<1>,\g<2>)", text)
text = re.sub(r"sqrt(?:\s*)?\(", r"sqrt(", text)
text

'i represents sqrt(-1).'

In [59]:
supported_operators = ["**", "/", "*", "+", ">", "<", "="]
supported_word_operators = ["sqrt", "abs", "inf"]
words = text.split()
print(words)
for i in list(range(len(words))):
    word = words[i]
    if any([op in word for op in supported_operators]) or any([op in word for op in supported_word_operators]):
        punctuation = re.findall("[\?\.,:]+}$", word) #Capture all the punctuation at the end of the sentence
        print(punctuation)
        if punctuation:
            punctuation = punctuation[0]
        else:
            punctuation = ""


['i', 'represents', 'sqrt(-1).']
[]


In [68]:
re.findall('[?.,:]', 'sqrt(-1).')

['.']

In [70]:
re.findall("[?.,:]+}$", '.,}')

['.,}']

In [21]:
df.iloc[105]['Answer']

'2020-01-16 00:00:00'

In [25]:
li = re.split('-| ', df.iloc[105]['Answer'])

In [27]:
str(int(li[1])) + '/' + str(int(li[2]))

'1/16'

In [48]:
df['Answer']

0                        0
1                  (x+2)/3
2                        0
3                      x+2
4                  (x+2)/3
              ...         
160    2020-07-14 00:00:00
161    2020-02-14 00:00:00
162                      0
163                      9
164    2020-09-14 00:00:00
Name: Answer, Length: 165, dtype: object

In [49]:
df['Answer'].str.len()

0       NaN
1       7.0
2       NaN
3       3.0
4       7.0
       ... 
160    19.0
161    19.0
162     NaN
163     1.0
164    19.0
Name: Answer, Length: 165, dtype: float64

In [39]:
df['Answer']

0      hi
1      hi
2      hi
3      hi
4      hi
       ..
160    hi
161    hi
162    hi
163    hi
164    hi
Name: Answer, Length: 165, dtype: object

In [28]:
questions = [x for _, x in df.groupby(df['Problem Name'])]
problem_name = questions[0].iloc[0]['Problem Name']
tutoring = []
hint_dic = {}
for question in questions:
    for index, row in question.iterrows():
        #checks row type 
        row_type = row['Row Type'].strip().lower()
        if index != 0:
            if row_type == "scaffold":
                scaff_images = ""
                scaff, full_id = create_scaffold("current_step_name", row["HintID"], row["Title"], row["Body Text"], row["answerType"], row["Answer"], row["mcChoices"], row["Dependency"], scaff_images, hint_dic=hint_dic)
                hint_dic[row["HintID"]] = full_id
                tutoring.append(scaff)
#                 previous_tutor = row
#                 previous_images = scaff_images
tutoring

<IPython.core.display.Latex object>

$$\left(12\right) \left(5\right)-\left(12\right) \left(2\right) i$$


<IPython.core.display.Latex object>

$$\left(12\right) \left(5\right)-\left(12\right) \left(2\right) i$$
This failed
12*5?
invalid syntax (<unknown>, line 1)
This failed
12*2i?
invalid syntax (<unknown>, line 1)


['{id: "current_step_name-h2", type: "scaffold", problemType: "MultipleChoice", answerType: "string", hintAnswer: ["12*5-12*2i"], dependencies: [], title: "Rewriting the Expression With the Distributive Property", text: "What does the expression look like after it is rewritten using the Distributive Property?", choices: ["5-2i", "$$\\\\\\\\left(12\\\\\\\\right) \\\\\\\\left(5\\\\\\\\right)-\\\\\\\\left(12\\\\\\\\right) \\\\\\\\left(2\\\\\\\\right) i$$", "12i", "5i"]}',
 '{id: "current_step_name-h3", type: "scaffold", problemType: "TextBox", answerType: "arithmetic", hintAnswer: ["60"], dependencies: [], title: "Product of First Term", text: "What is the product of 12*5?"}',
 '{id: "current_step_name-h4", type: "scaffold", problemType: "MultipleChoice", answerType: "string", hintAnswer: ["24i"], dependencies: [], title: "Product of Second Term", text: "What is the product of 12*2i?", choices: ["24i", "12i", "2i", "16i"]}']

In [None]:
import re, urllib

class Spreadsheet(object):
    def __init__(self, key):
        super(Spreadsheet, self).__init__()
        self.key = key

class Client(object):
    def __init__(self, email, password):
        super(Client, self).__init__()
        self.email = email
        self.password = password

    def _get_auth_token(self, email, password, source, service):
        url = "https://www.google.com/accounts/ClientLogin"
        params = {
            "Email": email, "Passwd": password,
            "service": service,
            "accountType": "HOSTED_OR_GOOGLE",
            "source": source
        }
        req = urllib.Request(url, urllib.urlencode(params))
        return re.findall(r"Auth=(.*)", urllib2.urlopen(req).read())[0]

    def get_auth_token(self):
        source = type(self).__name__
        return self._get_auth_token(self.email, self.password, source, service="wise")

    def download(self, spreadsheet, gid=0, format="csv"):
        url_format = "https://spreadsheets.google.com/feeds/download/spreadsheets/Export?key=%s&exportFormat=%s&gid=%i"
        headers = {
            "Authorization": "GoogleLogin auth=" + self.get_auth_token(),
            "GData-Version": "3.0"
        }
        req = urllib.Request(url_format % (spreadsheet.key, format, gid), headers=headers)
        return urllib.urlopen(req)

if __name__ == "__main__":
    import getpass
    import csv

    email = "tinnaliu@berkeley.edu" # (your email here)
    password = getpass.getpass()
    spreadsheet_id = "1Lp0uGtQsuzxzrm1TSctuZttJRrvaG0E5cwT-75UKZeY" # (spreadsheet id here)

    # Create client and spreadsheet objects
    gs = Client(email, password)
    ss = Spreadsheet(spreadsheet_id)

    # Request a file-like object containing the spreadsheet's contents
    csv_file = gs.download(ss)

    # Parse as CSV and print the rows
    for row in csv.reader(csv_file):
        print(", ".join(row))

In [24]:
scope = ['https://spreadsheets.google.com/feeds'] 
credentials = ServiceAccountCredentials.from_json_keyfile_name('../sunlit-shelter-282118-8847831293f8.json', scope) 
gc = gspread.authorize(credentials)
book = gc.open_by_key('1hOjiUSVAD5RxRB7NoO_8r-XnIerHJ0PWR7GII4n4ALI')
worksheet = book.worksheet('Sheet4') 
table = worksheet.get_all_values()
df = pd.DataFrame(table[1:], columns=table[0])
##Only keep columns we need 
variabilization = 'Variabilization' in df.columns
if variabilization:
    df = df[["Problem Name","Row Type","Variabilization","Title","Body Text","Answer", "answerType", "HintID", "Dependency", "mcChoices", "Images (space delimited)","Parent","OER src","openstax KC", "KC","Taxonomy"]]
else:
    df = df[["Problem Name","Row Type","Title","Body Text","Answer", "answerType", "HintID", "Dependency", "mcChoices", "Images (space delimited)","Parent","OER src","openstax KC", "KC","Taxonomy"]]
df = df.astype(str)
df.replace('', 0.0, inplace = True)
df.replace(' ', 0.0, inplace = True)

In [41]:
error_df = pd.DataFrame(index=range(len(df)), columns=['Check 1', 'Check 2'])
error_df.at[0, 's1'] = 'answer missing'
error_df.at[1, 's1'] = 'answer missing'
error_df.at[0, 'Check 2'] = 'yes'
error_df

Unnamed: 0,Check 1,Check 2,s1
0,,yes,answer missing
1,,,answer missing
2,,,
3,,,
4,,,
5,,,
6,,,
7,,,
8,,,
9,,,


In [21]:
from gspread_dataframe import get_as_dataframe, set_with_dataframe

In [26]:
set_with_dataframe(worksheet, error_df, col=17)

In [46]:
(error_df[error_df['s1'] == 'answer missing'].index & error_df[error_df['Check 2'] == 'yes'].index)[0]

0

In [43]:
error_df[error_df['Check 2'] == 'yes'].index

Int64Index([0], dtype='int64')

In [20]:
# i = "https://imgur.com/7KTfjfm"
i = "https://openstax.org/resources/aef01c7d7904ee664716928aa7b16c58bc6a78e1"
i = "https://ibb.co/zVgQ37c"
r = requests.get(i)

In [21]:
from PIL import Image
from io import BytesIO

Image.open(BytesIO(r.content))

UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fa351dd95e0>

In [19]:
i = "https://imgur.com/7KTfjfm"
urllib.request.urlretrieve(i, "local-filename.jpg")

('local-filename.jpg', <http.client.HTTPMessage at 0x7fa351ddd430>)

In [1]:
import pandas as pd
import numpy as np
import gspread 
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials

In [3]:
FEEDBACK_SPREADSHEET = "1PoPG4i_gQy20YdeyYpD5SvjfohbuUwSyLCcNOKDBQ20"
SHEET_NAME = "Selenium Error Log"

In [4]:
scope = ['https://spreadsheets.google.com/feeds'] 
credentials = ServiceAccountCredentials.from_json_keyfile_name('../sunlit-shelter-282118-8847831293f8.json', scope) 
gc = gspread.authorize(credentials)
book = gc.open_by_key(FEEDBACK_SPREADSHEET)
worksheet = book.worksheet(SHEET_NAME) 
table = worksheet.get_all_values()
original_df = pd.DataFrame(table[1:], columns=table[0])

In [10]:
original_df["problem_name"] = original_df["Error Log"].str.split(": ").str[0]
original_df = original_df.set_index("problem_name")
original_df

Unnamed: 0_level_0,Error Log,Issue Type,status,Comment
problem_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
poly31,poly31: Invalid answer for step 1: $$a^4+\left...,frontend,open,Error in console: index.esm.js:964 GET https:/...
rotation18,rotation18: Invalid answer for step 2: (21x'**...,script,resolved,
rotation16,rotation16: Invalid answer for step 1: (3+2sqr...,script,resolved,
Exception on problem matrices14,Exception on problem matrices14: list index ou...,content,open,Matrix answer is not well formatted
partfrac11,partfrac11: Invalid answer for step 1: (1/(x-2...,content,open,
...,...,...,...,...
logarithmic10,logarithmic10: Invalid answer for step 1: $$\f...,script,resolved,
SolveMixture1,"SolveMixture1: Invalid answer for step 1: 42,136",content,open,
expolog11,expolog11: Invalid answer for step 1: @{ans},script,open,
MoApp3,MoApp3: step 5 submit does not exist.,UNKNOWN,open,


In [4]:
df1 = pd.DataFrame([["abc", "bcd", "cde"], 
                    ["def", 'efg', 'ghi']], 
                  columns = ['A', 'B', 'C'])
df1 = df1.set_index('A')
df1

Unnamed: 0_level_0,B,C
A,Unnamed: 1_level_1,Unnamed: 2_level_1
abc,bcd,cde
def,efg,ghi


In [5]:
df2 = pd.DataFrame([["hie", "qwe", "ads"], 
                    ["def", 'efg', 'ghi']], 
                  columns = ['A', 'B', 'C'])
df2 = df2.set_index('A')
df2

Unnamed: 0_level_0,B,C
A,Unnamed: 1_level_1,Unnamed: 2_level_1
hie,qwe,ads
def,efg,ghi


In [10]:
df1.append(df2.loc[df2.index.difference(df1.index)])

Unnamed: 0_level_0,B,C
A,Unnamed: 1_level_1,Unnamed: 2_level_1
abc,bcd,cde
def,efg,ghi
hie,qwe,ads


In [19]:
df1 = pd.DataFrame([["abc", "bcd", "cde"], 
                    ["def", 'efg', 'ghi']], 
                  columns = ['A', 'B', 'C'])
df1

Unnamed: 0,A,B,C
0,abc,bcd,cde
1,def,efg,ghi


In [20]:
df1.append({"A": "nice", "B": "", "C": "q89q"}, ignore_index=True)

Unnamed: 0,A,B,C
0,abc,bcd,cde
1,def,efg,ghi
2,nice,,q89q
