# SIMCCT User Analytics

This Jupyter Notebook is for doing some quick and dirty tests for running data 
analytics operations on the MongoDB and Redis data persistence. The purpose of 
which is to then put into a pipeline and then create endpoints for each type 
of query and analysis that can be done.

In [7]:
# Plotly imports

import chart_studio
import chart_studio.plotly as py
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

chart_studio.tools.set_credentials_file(
    username='codeninja55', 
    api_key='mLp691cLJDdKaNgJykR4'
)

chart_studio.tools.set_config_file(
    world_readable=True,
    sharing='public'
)

In [8]:
# imports
from os import environ as env
from pymongo import MongoClient
import pandas as pd

In [9]:
conn = MongoClient(env.get('MONGO_URI'))

db_name = 'arc_dev'
collection = 'users'
db = conn[db_name]

In [10]:
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'arc_dev')

In [17]:
cursor = db[collection].find(
    {
        'profile': {'$exists': True}
    }, 
    projection={'password': 0, '_id': False}
)
df = pd.DataFrame(list(cursor))

In [18]:
df.head()

Unnamed: 0,email,first_name,last_name,profile,admin_profile,saved_alloys,active,admin,disable_admin,verified,created,last_updated,ratings,login_data
0,ironman@avengers.io,Tony,Stark,"{'aim': 'Experimentation', 'highest_education'...",{'position': 'Genius Playboy Billionaire Phila...,"[{'_id': 5d96c2fa1b447aef56bada46, 'name': 'Ar...",True,True,False,True,2019-10-04 03:56:42.610,2019-10-04 03:56:42.610,[],[]
1,black_widow@marvel.io,Natasha,Romanoff,"{'aim': 'Engineering Work', 'highest_education...",,"[{'_id': 5d96c2fb1b447aef56bada4c, 'name': 'Ar...",True,False,False,True,2019-10-04 03:56:42.610,2019-10-04 03:56:42.610,[],[]


In [19]:
pipeline = [
    {'$unwind': '$profile'},
    {'$project': {'profile': 1, '_id': False}},
]

res = db[collection].aggregate(pipeline)
list(res)

[{'profile': {'aim': 'Experimentation',
   'highest_education': 'Masters Degree',
   'sci_tech_exp': 'Advanced',
   'phase_transform_exp': 'Advanced'}},
 {'profile': {'aim': 'Engineering Work',
   'highest_education': 'High School',
   'sci_tech_exp': 'Advanced',
   'phase_transform_exp': 'Beginner'}}]

In [20]:
pipeline = [
    {'$unwind': '$profile'},
    {'$project': {
        'aim': '$profile.aim', 
        'highest_education': '$profile.highest_education', 
        'sci_tech_exp': '$profile.sci_tech_exp', 
        'phase_transform_exp': '$profile.phase_transform_exp',
        '_id': 0
        }
    },
]

res = db[collection].aggregate(pipeline)
profile_df = pd.DataFrame(list(res))
profile_df

Unnamed: 0,aim,highest_education,sci_tech_exp,phase_transform_exp
0,Experimentation,Masters Degree,Advanced,Advanced
1,Engineering Work,High School,Advanced,Beginner


In [39]:
profile_df['aim'].unique()

array(['Experimentation', 'Engineering Work'], dtype=object)

In [46]:
list(profile_df['aim'].value_counts())

[1, 1]

In [47]:
# layout = go.Layout(
#     title='User Profile Aim',
#     xaxis=dict(title='User Aims'),
#     yaxis=dict(title='Count')
# )

# fig = go.Figure(layout=layout)
fig = make_subplots(
    rows=2, 
    cols=2,
    subplot_titles=[
        'Aim', 
        'Highest Education', 
        'Science Tech. Experience',
        'Phase Transform Experience'
    ]
)

trace_aim = go.Bar(x=list(profile_df['aim'].unique()), y=list(profile_df['aim'].value_counts()))
trace_edu = go.Bar(x=list(profile_df['highest_education'].unique()), y=list(profile_df['highest_education'].value_counts()))
trace_sci = go.Bar(x=list(profile_df['sci_tech_exp'].unique()), y=list(profile_df['sci_tech_exp'].value_counts()))
trace_pha = go.Bar(x=list(profile_df['phase_transform_exp'].unique()), y=list(profile_df['phase_transform_exp'].value_counts()))

fig.add_trace(trace_aim, row=1, col=1)
fig.add_trace(trace_edu, row=1, col=2)
fig.add_trace(trace_sci, row=2, col=1)
fig.add_trace(trace_pha, row=2, col=2)

fig.update_layout(
    # height=800,
    # width=1200,
    showlegend=False,
    title_text="User Profile Answers"
)

py.iplot(fig, filename='user_profile_aim')
# pio.write_image(fig, file='user_profile_aim.png')