# SuAVE Simple Variable Calculations
### This sample notebook will read numeric variables from a survey dataset and let users compute a derivative numeric variable and add it to new survey version

In [None]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

### 1. Retrieve survey parameters from the URL

In [None]:
%%javascript
function getQueryStringValue (key)
{  
    return unescape(window.location.search.replace(new RegExp("^(?:.*[&\\?]" + escape(key).replace(/[\.\+\*]/g, "\\$&") + "(?:\\=([^&]*))?)?.*$", "i"), "$1"));
}
IPython.notebook.kernel.execute("survey_url='".concat(getQueryStringValue("surveyurl")).concat("'"));
IPython.notebook.kernel.execute("views='".concat(getQueryStringValue("views")).concat("'"));
IPython.notebook.kernel.execute("view='".concat(getQueryStringValue("view")).concat("'"));
IPython.notebook.kernel.execute("user='".concat(getQueryStringValue("user")).concat("'"));
IPython.notebook.kernel.execute("csv_file='".concat(getQueryStringValue("csv")).concat("'")); 
IPython.notebook.kernel.execute("dzc_file='".concat(getQueryStringValue("dzc")).concat("'")); 
IPython.notebook.kernel.execute("params='".concat(getQueryStringValue("params")).concat("'")); 
IPython.notebook.kernel.execute("active_object='".concat(getQueryStringValue("activeobject")).concat("'")); 
IPython.notebook.kernel.execute("full_notebook_url='" + window.location + "'"); 

### 2. Read the survey file and extract numeric variables

In [None]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.linear_model import LinearRegression


absolutePath = "../../temp_csvs/"

# read the csv file
file = open(absolutePath + csv_file, encoding="latin-1")
# print(absolutePath + csv_file)
df = pd.read_csv(file)

# create a list of variable names
variables_df = pd.DataFrame({'varname':df.columns})
print("All variables:")
print(variables_df.varname.values)

nums_df = df[[n for n in variables_df.varname.values if '#number' in n]]



# create a dictionary of #number variables with abbreviated and full variable names 
var_list = {n[:n.index('#')]:n for n in variables_df.varname.values if '#number' in n}
print("Number variables: " , var_list)


### 3. Statistical information ##

In [None]:
#creating widgets
wlist = []
i = 0

while i < 5 and i < len(var_list):
    wlist.append(widgets.Dropdown(options=var_list.keys(), value = list(var_list.keys())[i], description = str(i+1) + ":"))
    i += 1
  
print("Select variables for scatter matrix plotting, then run the next cell")

for n in wlist:
    display(n)
#a = widgets.Dropdown(options=var_list.keys())
#b = widgets.Dropdown(options=var_list.keys())
#c = widgets.Dropdown(options=var_list.keys())
#d = widgets.Dropdown(options=var_list.keys())
#e = widgets.Dropdown(options=var_list.keys())
#ui = widgets.VBox([a, b, c, d, e])
#def f(a, b, c, d, e):
#    return ((a, b, c, d, e))

#formula = widgets.interactive_output(f, {'a': a, 'b': b, 'c': b, 'd': b, 'e': b})

#display(ui, formula)

In [None]:
#scattermatrix
scat_df = df[[var_list[n.value] for n in wlist]]
print("Scatter matrix with the variables selected")
wot = pd.tools.plotting.scatter_matrix(scat_df, alpha=0.2, figsize=(10, 10), diagonal='kde')
for n in range(len(wot)):
    for j in range(len(wot)):
        ax = wot[n, j]
        ax.set(xlabel=wlist[j].value, ylabel=wlist[n].value)

In [None]:
#creating widgets
a = widgets.Dropdown(options=var_list.keys(), value = list(var_list.keys())[0], description = "x:")
b = widgets.Dropdown(options=var_list.keys(), value = list(var_list.keys())[1], description = "y:")
ui = widgets.VBox([a, b])
def f(a, b):
    return ((a, b))

print("Select x and y variables for plotting (respectively), then run the next cell")

formula = widgets.interactive_output(f, {'a': a, 'b': b})

display(ui, formula)


In [None]:
#scatter and linear regline
x = df[var_list[a.value]].tolist()
y = df[var_list[b.value]].tolist()

print("Scatterplot with linear regression line and confidence intervals:")

ax = sb.regplot(var_list[a.value], var_list[b.value], df, scatter_kws={"s": 20, "color" : "red", "alpha" : "0.2"})
ax.set(xlabel=a.value, ylabel=b.value)

#adding the log regline
#fitl = np.polyfit(np.log(x), y, 1).tolist()

#x.sort()
#y1 = [np.log(i)*fitl[0] + fitl[1] for i in x]

#plt.plot(x, y1, "-r")

#end plot
plt.show()

In [None]:
#creating widgets
a = widgets.Dropdown(options=var_list.keys(), description = "Variable:")
ui = widgets.VBox([a])
def f(a):
    return (a)

print("Select variable for descriptive statistics, then run the next cell")

formula = widgets.interactive_output(f, {'a': a})

display(ui, formula)

In [None]:
#calculating descriptive stats
var = df[var_list[a.value]]
vmean = var.mean()
vsd = var.std()
vskew = var.skew()
vvar = var.var()

print("Mean of dataset: " + str(vmean))
print("Standard deviation of dataset: " + str(vsd))
print("Variance of dataset: " + str(vvar))
print("Skew of dataset: " + str(vskew))
print("Histogram:")
ax = var.hist()
ax.set(xlabel=a.value)
noout = plt.axvline(vmean, color='red', linestyle='dashed', linewidth=2)

### 4. Define a new variable using the form ##

In [None]:
# Define a new Variable
a = widgets.Dropdown(options=var_list.keys())
b = widgets.Dropdown(options=['Number of SDs', 'Abs dist from mean'])
ui = widgets.VBox([a, b])
def f(a, b):
    return ((a, b))

formula = widgets.interactive_output(f, {'a': a, 'b': b})

display(ui, formula)

print("Select variables and the operation, then run the next cell")

### 5. Edit variable name as needed

In [None]:
# Give a Name to the New Variable
def f(Var_Name):
    return Var_Name
if b.value == "Abs dist from mean":
    newvar = interact(f, Var_Name= "ADFM" + " on " + a.value +'#number')
elif b.value == "Number of SDs":
    newvar = interact(f, Var_Name= "NSD" + " for " + a.value +'#number')
    
print("After defining variable name hit Enter, then run the next cell")

### 6. Compute the new variable and format it for SuAVE

In [None]:
#Open the selected notebook
# In [73]:
# Compute the new variable

mean = df[var_list[a.value]].mean()
std = df[var_list[a.value]].std()

if b.value == 'Abs dist from mean':
    df[newvar.widget.result] = [abs(i - mean) for i in df[var_list[a.value]]]
    print(df[newvar.widget.result])
elif b.value == 'Num of SDs':
    df[newvar.widget.result] = [math.ceil(abs(i - mean) / std) for i in df[var_list[a.value]]]
    print(df[newvar.widget.result])
elif b.value == '+':
    df[newvar.widget.result] = df[var_list[a.value]] + df[var_list[c.value]]
elif b.value == '-':
    df[newvar.widget.result] = df[var_list[a.value]] - df[var_list[c.value]]
    
# make sure there are no illegal NaN type values in this #number variable
df[newvar.widget.result].fillna('',inplace=True)
df[newvar.widget.result] = pd.to_numeric(df[newvar.widget.result], errors='coerce', downcast='float')
df[newvar.widget.result] = df[newvar.widget.result].apply(lambda x: '{:.6f}'.format(x))
df.replace(['None', 'nan'], pd.np.nan, inplace=True)

### 7. Save the new version of CSV file, and give a name to new survey

In [None]:
# new filename
new_file = absolutePath + csv_file[:-4]+'_v1.csv'
# print(new_file)
df.to_csv(new_file, index=None)

In [None]:
#Input survey name

from IPython.display import display
input_text = widgets.Text()
output_text = widgets.Text()

def bind_input_to_output(sender):
    output_text.value = input_text.value

# Tell the text input widget to call bind_input_to_output() on submit
input_text.on_submit(bind_input_to_output)

print("Input survey name here, then press Enter:")
# Display input text box widget for input
display(input_text)

display(output_text)

print('After setting new survey name, run the next cell')

In [None]:
#Print survey name
survey_name = output_text.value
print("Survey name is:", survey_name)

### 8. Generate the survey and create survey URL

In [None]:
#Parse url
upload_url = survey_url.split("/main")[0]

if "https" in upload_url:
    upload_url = upload_url.replace("s","",1)
    upload_url = upload_url + ":3001/uploadCSV"    
else:
    upload_url = upload_url + "/uploadCSV"

    
    
new_survey_url_base = survey_url.split(user)[0]

In [None]:
import requests
import re
upload_data = {'name': input_text.value, 'dzc': dzc_file, 'user':user}
files = {"file": open(new_file, "rb")}
r = requests.post(upload_url, files=files, data=upload_data)
print(r.status_code, r.reason)

regex = re.compile('[^0-9a-zA-Z_]')
survey_url = survey_name
survey_url =  regex.sub('_', survey_url)

url = new_survey_url_base + user + "_" + input_text.value + ".csv" + "&views=" + views + "&view=" + view
print(url)
print ("Click the URL to open the new survey")
