In [5]:
#Import packages for data cleaning, analysis, and visualization
import numpy as np
import math
from collections import Counter
import requests

%matplotlib inline
import matplotlib.pyplot as pl
import matplotlib.pylab as pylab
from matplotlib.patches import Polygon

import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
import cufflinks as cf

import seaborn
import statsmodels.formula.api as sm
from sklearn.linear_model import LinearRegression
import scipy, scipy.stats
import pandas as pd

from geopy.geocoders import GoogleV3

from simple_salesforce import Salesforce

In [6]:
#Connect to Salesforce API
with open('SalesforceAPI.txt') as file:
    username, password, token = [x.strip("\n") for x in file.readlines()]
sf = Salesforce(username=username, password=password, security_token=token)
        

In [7]:
opportunities = sf.query("SELECT Opportunity.Account.MasterRecordID, Opportunity.Account.Name, Group_ID__c, Name, Opportunity.Account.Industry__c, CloseDate, Month__c, Amount, Skier_Count__c, Skier_Days__c, Lessons__c, Rentals__c, No_of_Days__c, Transportation__c, Opportunity.Account.BillingStreet, Opportunity.Account.BillingCity, Opportunity.Account.BillingPostalCode, Opportunity.Account.BillingState, (SELECT Contact.Name,Contact.Email, Contact.Id, Contact.MailingStreet, Contact.MailingCity, Contact.MailingPostalCode, Contact.MailingState, OpportunityId, Role from OpportunityContactRoles where IsPrimary=True) FROM Opportunity")   


In [8]:
#Extract Account ID
Account_IDs = [x['Account']['MasterRecordId'] for x in opportunities["records"]]
Account_IDs_counts = Counter(Account_IDs)

#Extract Account Name
Account_names = [x['Account']['Name'] for x in opportunities["records"]]
Account_names_counts = Counter(Account_names)

#Extract Group IDs
Group_IDs = [x['Group_ID__c'] for x in opportunities["records"]]
Group_IDs_counts = Counter(Group_IDs)

#Extract Opportunity/Group Names
Group_names = [x['Name'] for x in opportunities["records"]]
Group_names_counts = Counter(Group_names)

#Extract Industry Type
Industries = [x['Account']['Industry__c'] for x in opportunities["records"]]
Industries_counts = Counter(Industries)

#Extract 1st Ski Date
First_day = [x['CloseDate'] for x in opportunities["records"]]
First_day_counts = Counter(First_day)

#Extract Visit Period Month
Months = [x['Month__c'] for x in opportunities["records"]]
Months_counts = Counter(Months)

#Extract Group Revenue
Revenue = [x['Amount'] for x in opportunities["records"]]
Revenue_counts = Counter(Revenue)

#Extract Skier Count
Skier_number = [x['Skier_Count__c'] for x in opportunities["records"]]
Skier_number_counts = Counter(Skier_number)

#Extract Skier Days
Skier_days = [x['Skier_Days__c'] for x in opportunities["records"]]
Skier_days_counts = Counter(Skier_days)

#Extract Lessons Binary: Yes or No (1 or 0)
Lessons = [x['Lessons__c'] for x in opportunities["records"]]
Lessons_counts = Counter(Lessons)

#Extract Rentals Binary: Yes or No (1 or 0)
Rentals = [x['Rentals__c'] for x in opportunities["records"]]
Rentals_counts = Counter(Rentals)

#Extract Visit Duration (Number of Days)
Duration = [x['No_of_Days__c'] for x in opportunities["records"]]
Duration_counts = Counter(Duration)

#Extract Transportation Mode (Cars, Bus, Vans)
Transportation = [x['Transportation__c'] for x in opportunities["records"]]
Duration_counts = Counter(Duration)

#Extract Billing Address
Address = [x['Account']['BillingStreet'] for x in opportunities["records"]]
Address_counts = Counter(Address)

#Extract Billing City
Cities = [x['Account']['BillingCity'] for x in opportunities["records"]]
Cities_counts = Counter(Cities)

#Extract Billing Zip Code
Zipcodes = [x['Account']['BillingPostalCode'] for x in opportunities["records"]]
Zipcodes_counts = Counter(Zipcodes)

#Extract Billing State
States = [x['Account']['BillingState'] for x in opportunities["records"]]
States_counts = Counter(States)

#Extract Group Leader Name
Groupleaders = []
for x in opportunities["records"]:
    if x["OpportunityContactRoles"] is not None:
        Groupleaders.append(x["OpportunityContactRoles"]["records"][0]["Contact"]["Name"])
    else:
        Groupleaders.append(None)
        
#Extract Group Leader Mailing Street
GL_streets = []
for x in opportunities["records"]:
    if x["OpportunityContactRoles"] is not None:
        GL_streets.append(x["OpportunityContactRoles"]["records"][0]["Contact"]["MailingStreet"])
    else:
        GL_streets.append(None)
        
#Extract Group Leader Mailing City
GL_cities = []
for x in opportunities["records"]:
    if x["OpportunityContactRoles"] is not None:
        GL_cities.append(x["OpportunityContactRoles"]["records"][0]["Contact"]["MailingCity"])
    else:
        GL_cities.append(None)
        
#Extract Group Leader Mailing Postal Code
GL_zipcode = []
for x in opportunities["records"]:
    if x["OpportunityContactRoles"] is not None:
        GL_zipcode.append(x["OpportunityContactRoles"]["records"][0]["Contact"]["MailingPostalCode"])
    else:
        GL_zipcode.append(None)
        
#Extract Group Leader Mailing State
GL_states = []
for x in opportunities["records"]:
    if x["OpportunityContactRoles"] is not None:
        GL_states.append(x["OpportunityContactRoles"]["records"][0]["Contact"]["MailingState"])
    else:
        GL_states.append(None)

In [9]:
#FINISH BUILDING DATAFRAME
Opportunities_df = pd.DataFrame({'AccountID':Account_IDs, 'AccountName': Account_names, 'GroupID':Group_IDs, 
                                 'GroupName': Group_names, 'GroupLeader':Groupleaders, 'LeaderStreet':GL_streets, 'LeaderCity': GL_cities, 'LeaderState': GL_states, 'LeaderZipcode': GL_zipcode, 'Industry':Industries, 'ArrivalDate':First_day,
                                 'Month':Months, 'Revenue': Revenue, 'SkierCount':Skier_number, 'SkierDays': Skier_days, 'Lesson':Lessons,
                                 'Rentals':Rentals, 'Duration':Duration, 'Transportation':Transportation,'Address':Address,
                                 'City':Cities, 'Zipcode': Zipcodes, 'State':States})

In [10]:
#Data Cleaning
clean_opps = Opportunities_df
clean_opps['ArrivalDate'] = pd.to_datetime(clean_opps['ArrivalDate'])
clean_opps['VisitCount_AllTIme']= clean_opps.groupby(['AccountName'])['ArrivalDate'].transform('count')
num_records = len(clean_opps)
clean_opps.to_csv('/Users/AnthonyAbercrombie/projects/Monarch_Salesforce/clean_opps.csv', encoding='utf-8')

In [11]:
#Splice dataframe for 2015-2016 Ski Season and clean_opps_2014-2015
clean_opps = clean_opps.sort_values(by='ArrivalDate',ascending=False)
clean_opps_dateindex = clean_opps.set_index('ArrivalDate')
clean_opps_2015to2016 = clean_opps_dateindex['2016-04-10':'2015-11-26']
clean_opps_2014to2015 = clean_opps_dateindex['2015-04-10':'2014-11-01']

#Sort by Account Name so we can clusters of multi-visit accounts together.
clean_opps_2015to2016_acct_cluster = clean_opps_2015to2016.sort_values(by='AccountName')
clean_opps_2014to2015_acct_cluster = clean_opps_2014to2015.sort_values(by='AccountName')

#Reset Index for easier data analysis
clean_opps_2015to2016_acct_cluster_noindex = clean_opps_2015to2016_acct_cluster.reset_index()
clean_opps_2014to2015_acct_cluster_noindex = clean_opps_2014to2015_acct_cluster.reset_index()

#View for easily printing address labels and Group Leader names, which are used for mailing purposes. Will merge with Account_names_counts_df
GL_addresses = clean_opps_2015to2016_acct_cluster_noindex[['AccountName', 'GroupName','ArrivalDate','Industry','GroupLeader','LeaderStreet','LeaderCity','LeaderZipcode','LeaderState']]

#Add column that describes the number of times a group account has visited Monarch in a season.
GL_addresses['VisitCount_2015to2016']=GL_addresses.groupby(['AccountName'])['ArrivalDate'].transform('count')
clean_opps_2015to2016_acct_cluster_noindex['VisitCount_2015to2016']= clean_opps_2015to2016_acct_cluster_noindex.groupby(['AccountName'])['City'].transform('count')
clean_opps_2014to2015_acct_cluster_noindex['VisitCount_2014to2015']= clean_opps_2014to2015_acct_cluster_noindex.groupby(['AccountName'])['City'].transform('count')

#write to CSV
GL_addresses.to_csv('/Users/AnthonyAbercrombie/projects/Monarch_Salesforce/labels4.csv',encoding='utf-8')
clean_opps_2015to2016 = clean_opps_2015to2016_acct_cluster_noindex
clean_opps_2014to2015 = clean_opps_2014to2015_acct_cluster_noindex



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [12]:
#Retrieve Latitude and Longitude from Google Maps API. In this instance, we're georeferencing the group leader's mailing address.
geolocator = GoogleV3()
GoogleAPI_key = 'AIzaSyD5V40uuQ3HlwMGHSAuj72wmIwnoGjuXYQ'
geolocator = GoogleV3(GoogleAPI_key)

addresses = clean_opps.loc[:,['LeaderStreet','LeaderCity', 'LeaderZipcode','LeaderState']]

addresses['GoogleQuery'] = addresses.LeaderStreet.str.cat([addresses.LeaderCity,
                                              addresses.LeaderZipcode,
                                              addresses.LeaderState],
                                              sep=' ')
addresses['GoogleQuery']

locations =[geolocator.geocode(addr, timeout=90) for addr in addresses.GoogleQuery]

In [13]:
#Append Latitude and Longitude to new data frame. New dataframe ommits records with invalid addresses.
geo_info = pd.DataFrame(
    [(clean_opps['AccountName'][addr],
      clean_opps['GroupName'][addr],
      clean_opps['GroupLeader'][addr],
      clean_opps['Revenue'][addr], clean_opps['SkierCount'][addr],
      clean_opps['SkierDays'][addr],clean_opps['ArrivalDate'][addr],
      clean_opps['VisitCount_AllTIme'][addr],
      clean_opps['Industry'][addr],
      clean_opps['Transportation'][addr],
      clean_opps['LeaderStreet'][addr],
      clean_opps['LeaderCity'][addr],
      clean_opps['LeaderZipcode'][addr],
      clean_opps['LeaderState'][addr],
      locations[addr].address,
      locations[addr].latitude, locations[addr].longitude)
      for addr in range(num_records) if locations[addr] is not None],
    columns=['AccountName','OpportunityName', 'GroupLeader',
             'Revenue','SkierCount',
             'SkierDays', 'ArrivalDate', 'VisitCount_AllTime','Industry', 'Transportation',
             'LeaderStreet','LeaderCity','LeaderZipcode','LeaderState',
             'GoogleSearchAddress',
             'latitude','longitude'])

In [14]:
#Splice Geodataframe for 2014-2015 and 2015-2016 Ski Season
geo_info = geo_info.sort_values(by='ArrivalDate',ascending=False)
geo_info_dateindex = geo_info.set_index('ArrivalDate')
geo_info_2015to2016 = geo_info_dateindex['2016-04-10':'2015-11-26']
geo_info_2014to2015 = geo_info_dateindex['2015-04-10': '2014-11-01']

#Sort by Account Name so we can clusters of multi-visit accounts together.
geo_info_2015to2016_acct_cluster = geo_info_2015to2016.sort_values(by='AccountName')
geo_info_2014to2015_acct_cluster = geo_info_2014to2015.sort_values(by='AccountName')

#Reset Index and rename for easier data analysis
GS_2015to2016 = geo_info_2015to2016_acct_cluster.reset_index()
GS_2014to2015 = geo_info_2014to2015_acct_cluster.reset_index()

#Visit Count for 2015 to 2016 Ski Season
GS_2015to2016['VisitCount_2015to2016']= GL_addresses.groupby(['AccountName'])['ArrivalDate'].transform('count')
GS_2014to2015['VisitCount_2014to2015']= GL_addresses.groupby(['AccountName'])['ArrivalDate'].transform('count')

In [15]:
#Number of distinct accounts (group entities) that visited the mountain by season
account_count_2015to2016 =pd.value_counts(clean_opps_2015to2016['AccountName'].values, sort=False)
account_count_2014to2015 =pd.value_counts(clean_opps_2014to2015['AccountName'].values, sort=False)
len(account_count_2014to2015)

223

In [21]:
clean_opps_2014to2015.head()

Unnamed: 0,ArrivalDate,AccountID,AccountName,Address,City,Duration,GroupID,GroupLeader,GroupName,Industry,...,Month,Rentals,Revenue,SkierCount,SkierDays,State,Transportation,Zipcode,VisitCount_AllTIme,VisitCount_2014to2015
0,2015-01-16,,Achgill International Students - Bryan,700 E 27th St.,Bryan,2,GP088-15,Suzanne Achgill,Achgill International Students January 2015,Higher Education,...,January,True,2003.0,16,30.0,TX,Cars,77803,2,1
1,2015-02-28,,Albuquerque Ski Club,1546 Rosalba St. NE,Albuquerque,2,GP153-15,Paula,Albuquerque Ski Club February 2015,Club,...,February,False,785.0,21,38.0,NM,Bus,87112,2,1
2,2014-12-07,,American Heritage Girls (AHG) - Colorado Springs,6471 Stella Luna Drive,Colorado Springs,1,GP038-15,Lisa Filsell,AHG Colorado Springs December 2014,Club,...,December,True,,10,,CO,Cars,80923,1,1
3,2015-03-21,,Arkansas Valley Regional Medical Center - La J...,1513 Carson Ave.,La Junta,1,GP175-15,Denise Balicki,AVRMC Radiology La Junta March 2015,Healthcare,...,March,False,683.0,13,13.0,CO,Cars,81050,1,1
4,2015-03-08,,Arlington Community Church - Arlington,1715 W. Randol Mill Rd,Arlington,3,GP109-15,Tim Nordskog,Arlington Community Church March 2015,Church,...,March,True,3536.0,14,44.0,TX,Vans,76012,1,1


In [16]:
#2014-2015 and 2015-2016 High Level Comparison

Groupnames_2014to2015 = clean_opps_2014to2015[['AccountName', 'VisitCount_2014to2015']]
Groupnames_2015to2016 = clean_opps_2015to2016[['AccountName', 'VisitCount_2015to2016']]
Visits_2014to2015 = Groupnames_2014to2015.drop_duplicates(subset='AccountName')['VisitCount_2014to2015'].tolist()
Visits_2015to2016 = Groupnames_2015to2016.drop_duplicates(subset='AccountName')['VisitCount_2015to2016'].tolist()

Skierdays_2014to2015 = clean_opps_2014to2015['SkierDays'].tolist()
Skierdays_2015to2016 = clean_opps_2015to2016['SkierDays'].tolist()

Revenue_2014to2015 = clean_opps_2014to2015['Revenue'].tolist()
Revenue_2015to2016 = clean_opps_2015to2016['Revenue'].tolist()

AlltimeVisits_2015to2016groups = clean_opps_2015to2016[['VisitCount_AllTIme', 'VisitCount_2015to2016']]


RepeatGroups_Instances_2014to2016 = [['AccountName', 'Revenue','SkierDays','VisitCount_2015to2016']]
for index, row in clean_opps_2015to2016.iterrows():
    if row['VisitCount_AllTIme' ] != row['VisitCount_2015to2016']:
        RepeatGroups_Instances_2014to2016.append([row['AccountName'], row['Revenue'], row['SkierDays'],
                                                 row['VisitCount_2015to2016']])
    else:
        pass
    

#RepeatGroups = list(set(RepeatGroups_Instances_2014to2016))
headers = RepeatGroups_Instances_2014to2016.pop(0)
RepeatGroups_Instances_2014to2016_df = pd.DataFrame(RepeatGroups_Instances_2014to2016, columns=headers)

RepeatGroups_Instances_2014to2016_df = pd.merge(RepeatGroups_Instances_2014to2016_df, clean_opps_2014to2015[['AccountName','VisitCount_2014to2015','VisitCount_AllTIme']], on= 'AccountName',how='left').dropna()
RepeatGroups_Instances_2014to2016_df = RepeatGroups_Instances_2014to2016_df.drop_duplicates()

#RepeatGroups_Instances_grouped = RepeatGroups_Instances_2014to2016_df.groupby('AccountName').sum()
RepeatGroups_Instances_grouped = RepeatGroups_Instances_2014to2016_df.groupby(['AccountName','VisitCount_2015to2016','VisitCount_2014to2015','VisitCount_AllTIme']).agg({'Revenue': np.sum,'SkierDays':np.sum})
#RepeatGroups_Instances_grouped['VisitCount_2015to2016'] = RepeatGroups_Instances_grouped['VisitCount_2015to2016'].apply(np.sqrt)
#RepeatGroups_Instances_grouped['VisitCount_2014to2015'] = RepeatGroups_Instances_grouped['VisitCount_2014to2015'].apply(np.sqrt)


In [17]:
#RepeatGroups_Instances_grouped.iloc[60:90,]
#RepeatGroups_Instances_2014to2016_df.iloc[:20,]
#pd.merge(RepeatGroups_Instances_2014to2016_df, clean_opps_2014to2015[['AccountName','VisitCount_2014to2015','VisitCount_AllTIme']], on= 'AccountName',how='left').dropna()
#RepeatGroups_Instances_2014to2016_df

RepeatGroups_Instances_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SkierDays,Revenue
AccountName,VisitCount_2015to2016,VisitCount_2014to2015,VisitCount_AllTIme,Unnamed: 4_level_1,Unnamed: 5_level_1
"Achgill International Students - Bryan, TX",1,1,2,18,2180.00
Albuquerque Ski Club,1,1,2,38,1028.00
"Austin Oaks Church - Austin, TX",1,1,2,18,1346.00
"Ayres Family - Overland Park, KS",1,1,2,32,2345.00
"BSA Troop 108 - Holcomb, KS",1,1,2,28,1550.00
BSA Troop 220 - Colorado Springs,1,1,2,17,777.00
BSA Troop 268 - Colorado Springs,1,1,2,24,784.00
BSA Troop 62 - Colorado Springs,1,1,2,60,1888.00
BSA Troops 8 & 9 - Pueblo,1,1,2,25,1701.00
"Bair Hershey Families - Culberston, NE",1,1,2,19,602.00


In [20]:
#Plot of 2-year overview comparision
trace0 = go.Bar(
    x=[np.nansum(Skierdays_2014to2015),np.nansum(Skierdays_2015to2016)],
    y= ['2014 - 2015 \n Skier Days','2015 - 2016 \n Skier Days'],
    marker=dict(
        color='rgba(144,87,180,0.6)',
        line=dict(
        color='rgba(144,87,180,1.0)',
        width=0.5,
        ),
    ),
    name='Skier Days',
    orientation='h',
)

trace1 = go.Bar(
    x=[np.nansum(Revenue_2014to2015),np.nansum(Revenue_2015to2016)],
    y=['2014 - 2015 \n Revenue', '2015 - 2016 \n Revenue'],
    marker=dict(
        color = 'rgba(87,180,144,0.6)',
        line = dict(
            color='rgba(87,180,144,1.0)',
            width=0.5,
        ),
    ),
    name = 'Gross Revenue',
    orientation='h'
)

trace2 = go.Bar(
    x=[np.nansum(Visits_2014to2015),np.nansum(Visits_2015to2016), len(RepeatGroups_Instances_2014to2016)],
    y=['2014 - 2015 \n Number of Visits', '2015 - 2016 \n Number of Visits', 'Visits from Repeat Groups'],
    marker=dict(
        color = 'rgba(245,24,86,0.6)',
        line = dict(
            color='rgba(245,24,86,1.0)',
            width=0.5,
        ),
    ),
    name = 'Number of Group Visits',
    orientation='h'
)

trace3 = go.Bar(
    x= [len(RepeatGroups_Instances_grouped),len(Visits_2015to2016), len(Visits_2014to2015)],
    y= ['Repeat Groups \n From Both Years', '# of Groups 2015-2016', '# of Groups 2014-2015'],
    marker=dict(
        color = 'rgba(255,200,87,0.6)',
        line = dict(
            color='rgba(255,200,87,1.0)',
            width=0.5,
        ),
    ),
    name = 'Distinct Groups (Accounts)',
    orientation='h'
)

layout = dict(
    title="Comparing the 2015-2016 Ski Season to 2014-2015",
    yaxis1 = dict(
        showgrid=False,
        showline=True,
        showticklabels=True,
        #domain=[.6, 0.69],
    ),
    yaxis2 = dict(
        showgrid=False,
        showline=True,
        showticklabels=True,
        #domain=[0.7, 0.79],
    ),
    yaxis3 = dict(
        showgrid=False,
        showline=True,
        showticklabels=True,
        #domain=[0.8, 0.89],
    ),
    yaxis4 = dict(
        showgrid=False,
        showline=True,
        showticklabels=True,
        #domain=[0.9, 1],
    ),
    xaxis1=dict(
        zeroline=False,
        showline=False,
        showticklabels=False,
        showgrid=True,
        domain=[0,0.85]
    ),
    xaxis2=dict(
        zeroline=False,
        showline=False,
        showticklabels=False,
        showgrid=True,
        domain=[0,0.85]
    ),
    xaxis3=dict(
        zeroline=False,
        showline=False,
        showticklabels=False,
        showgrid=True,
        domain=[0,0.85]
    ),
    xaxis4=dict(
        zeroline=False,
        showline=False,
        showticklabels=False,
        showgrid=True,
        domain=[0,0.85]
    ),
    legend=dict(
        x=0.85,
        y=0.5,
        font=dict(
            size=12,
        ),
    ),
    margin=dict(
        l=175,
        r=150,
        t=70,
        b=20,
    ),
    width=800,
    height=500,
    paper_bgcolor='rgb(248, 248, 255)',
    plot_bgcolor='rgb(248, 248, 255)',
)

annotations = []

y_skierdays = np.rint([np.nansum(Skierdays_2014to2015),np.nansum(Skierdays_2015to2016)])
skier_labels = ['2014 - 2015 \n Skier Days','2015 - 2016 \n Skier Days']
y_revenue = np.round([np.nansum(Revenue_2014to2015),np.nansum(Revenue_2015to2016)], decimals=2)
rev_labels = ['2014 - 2015 \n Revenue','2015 - 2016 \n Revenue']
y_visits = [np.nansum(Visits_2014to2015),np.nansum(Visits_2015to2016), len(RepeatGroups_Instances_2014to2016_df)]
vis_labels = ['2014 - 2015 \n Number of Visits', '2015 - 2016 \n Number of Visits', 'Visits from Repeat Groups']
y_repeats = [len(RepeatGroups_Instances_grouped),len(Visits_2015to2016), len(Visits_2014to2015)]
rep_labels = ['Repeat Groups \n From Both Years', '# of Groups 2015-2016', '# of Groups 2014-2015']

#for yski, xskid, yrev, xrevd, yvis, xvisd, yrep, xrepd in zip(
   # y_skierdays, skier_labels, y_revenue, rev_labels,
    #y_visits, vis_labels,y_repeats, rep_labels):

for yski, xskid in zip(y_skierdays, skier_labels):
    annotations.append(dict(xref='x1', yref='y1', y = xskid, x = yski,
                            xanchor = 'right',
                            text='{:,}'.format(yski) + ' Skiers Days',
                            font = dict(family='Arial',size=12,
                                       color= 'rgba(0,0,0,1.0)'),
                            showarrow=False,))
    
for yrev, xrevd in zip(y_revenue, rev_labels):    
    annotations.append(dict(xref='x2', yref='y2', y = xrevd, x = yrev,
                            xanchor = 'right',
                            text= '$'+'{:,}'.format(yrev),
                            font = dict(family='Arial',size=12,
                                       color= 'rgba(0,0,0,1.0)'),
                            showarrow=False,))

for yvis, xvisd in zip(y_visits, vis_labels):
    annotations.append(dict(xref='x3', yref='y3', y = xvisd, x = yvis,
                            xanchor = 'right',
                            text='{:,}'.format(yvis) + ' Visits',
                            font = dict(family='Arial',size=12,
                                       color= 'rgba(0,0,0,1.0)'),
                            showarrow=False,))

    
    
    
for yrep, xrepd in zip(y_repeats, rep_labels):    
    annotations.append(dict(xref='x4', yref='y4', y = xrepd , x = yrep,
                            xanchor = 'right',
                            text=str(yrep) + ' Distinct Groups',
                            font = dict(family='Arial',size=12,
                                       color= 'rgba(0,0,0,1.0)'),
                            showarrow=False,))
    
layout['annotations'] = annotations

fig = tools.make_subplots(rows=4, cols=1, specs=[[{}],[{}],[{}],[{}]], shared_xaxes=False,
                         shared_yaxes=True)

fig.append_trace(trace0, 1, 1)
fig.append_trace(trace1, 2, 1)
fig.append_trace(trace2, 3, 1)
fig.append_trace(trace3, 4, 1)

fig['layout'].update(layout)
fig['layout'].update(
    barmode = 'group',
    bargroupgap=0,
    bargap=0.3,
    autosize = True
)
TwoYearComparison = py.plot(fig, filename='monarch-groupsales-twoyear-comparison')
TwoYearComparison
py.iplot(fig, filename='monarch-groupsales-twoyear-comparison')

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]
[ (3,1) x3,y3 ]
[ (4,1) x4,y4 ]




Looks like you used a newline character: '\n'.

Plotly uses a subset of HTML escape characters
to do things like newline (<br>), bold (<b></b>),
italics (<i></i>), etc. Your newline characters 
have been converted to '<br>' so they will show 
up right on your Plotly figure!



In [None]:
Skierdays_2014to2015
Skierdays_2015to2016


In [357]:
revenue_groupcount2016 = clean_opps_2015to2016['Revenue'].count()
revenue_gross_sum2016 = clean_opps_2015to2016['Revenue'].sum()
revenue_mean2016 = clean_opps_2015to2016['Revenue'].mean()
revenue_median2016 = clean_opps_2015to2016['Revenue'].median()
revenue_min2016 = clean_opps_2015to2016['Revenue'].min()
revenue_max2016 = clean_opps_2015to2016['Revenue'].max()
revenue_std2016 = clean_opps_2015to2016['Revenue'].std()
revenue_skew2016 = clean_opps_2015to2016['Revenue'].skew()
revenue_kurt2016 = clean_opps_2015to2016['Revenue'].kurt()
revenue_25qt2016 = clean_opps_2015to2016['Revenue'].quantile(.25)
revenue_50qt2016 = clean_opps_2015to2016['Revenue'].quantile(.50)
revenue_75qt2016 = clean_opps_2015to2016['Revenue'].quantile(.75)

revenue_groupcount2015 = clean_opps_2014to2015['Revenue'].count()
revenue_gross_sum2015 = clean_opps_2014to2015['Revenue'].sum()
revenue_mean2015 = clean_opps_2014to2015['Revenue'].mean()
revenue_median2015 = clean_opps_2014to2015['Revenue'].median()
revenue_min2015 = clean_opps_2014to2015['Revenue'].min()
revenue_max2015 = clean_opps_2014to2015['Revenue'].max()
revenue_std2015 = clean_opps_2014to2015['Revenue'].std()
revenue_skew2015 = clean_opps_2014to2015['Revenue'].skew()
revenue_kurt2015 = clean_opps_2014to2015['Revenue'].kurt()
revenue_25qt2015 = clean_opps_2014to2015['Revenue'].quantile(.25)
revenue_50qt2015 = clean_opps_2014to2015['Revenue'].quantile(.50)
revenue_75qt2015 = clean_opps_2014to2015['Revenue'].quantile(.75)


revenue_sumstats_matrix = [['Group Sales Revenue', ' 2014-2015', '2015-2016'],
                          ['Group Count', revenue_groupcount2015, revenue_groupcount2016],
                          ['Total Revenue', revenue_gross_sum2015, revenue_gross_sum2016],
                          ['Mean Group Sale', revenue_mean2015,revenue_mean2016],
                          ['Median Group Sale', revenue_median2015,revenue_median2016],
                          ['Smallest Group Sale', revenue_min2015,revenue_min2016],
                          ['Largest Group Sale', revenue_max2015, revenue_max2016],
                          ['Revenue Standard Deviation', revenue_std2015, revenue_std2016],
                          ['Revenue Skew Factor', revenue_skew2015, revenue_skew2016],
                          ['Revenue Kurtosis Factor', revenue_kurt2015, revenue_kurt2016], 
                          ['Revenue 25th Quartile', revenue_25qt2015, revenue_25qt2016],
                          ['Revenue 50th Quartile', revenue_50qt2015, revenue_50qt2016],
                          ['Revenue 75th Quartile', revenue_75qt2015, revenue_75qt2016]]

revenue_table = tools.FigureFactory.create_table(revenue_sumstats_matrix, height_constant=60)

revenue_table.layout.update({'height' : 500, 'width': 500})

revenue_sumstats_table_plot_url = py.plot(revenue_table, filename='Revenue2015to2016-SummaryStats')
revenue_sumstats_table_plot_url
py.iplot(revenue_table, filename='Revenue2015to2016-SummaryStats')

In [367]:

cf.set_config_file(offline=False, world_readable=True, theme='pearl')

revenue_graph = pd.DataFrame(columns= ['data2015','data2016'])
revenue_graph['data2015'] = clean_opps_2014to2015['Revenue']
revenue_graph['data2016'] = clean_opps_2015to2016['Revenue']

trace2015 = go.Histogram(
        x = revenue_graph['data2015'].as_matrix(),
        opacity = 0.75,
        name = '2014-2015 Group Revenues'
        )

trace2016 = go.Histogram(
        x = revenue_graph['data2016'].as_matrix(),
        opacity = 0.70,
        name = '2015-2016 Group Revenues'
        )

data = [trace2015, trace2016]
layout = go.Layout(
    title = 'Distribution of Group Revenues',
    barmode='overlay')

fig = go.Figure(data=data, layout=layout)

revenue_histograms_plot_url = py.plot(fig, filename='cuflinks/revenue-histograms')
revenue_histograms_plot_url
py.iplot(fig, filename='cuflinks/revenue-histograms')

In [365]:
skierdays_groupcount2016 = clean_opps_2015to2016['SkierDays'].count()
skierdays_gross_sum2016 = clean_opps_2015to2016['SkierDays'].sum()
skierdays_mean2016 = clean_opps_2015to2016['SkierDays'].mean()
skierdays_median2016 = clean_opps_2015to2016['SkierDays'].median()
skierdays_min2016 = clean_opps_2015to2016['SkierDays'].min()
skierdays_max2016 = clean_opps_2015to2016['SkierDays'].max()
skierdays_std2016 = clean_opps_2015to2016['SkierDays'].std()
skierdays_skew2016 = clean_opps_2015to2016['SkierDays'].skew()
skierdays_kurt2016 = clean_opps_2015to2016['SkierDays'].kurt()
skierdays_25qt2016 = clean_opps_2015to2016['SkierDays'].quantile(.25)
skierdays_50qt2016 = clean_opps_2015to2016['SkierDays'].quantile(.50)
skierdays_75qt2016 = clean_opps_2015to2016['SkierDays'].quantile(.75)

skierdays_groupcount2015 = clean_opps_2014to2015['SkierDays'].count()
skierdays_gross_sum2015 = clean_opps_2014to2015['SkierDays'].sum()
skierdays_mean2015 = clean_opps_2014to2015['SkierDays'].mean()
skierdays_median2015 = clean_opps_2014to2015['SkierDays'].median()
skierdays_min2015 = clean_opps_2014to2015['SkierDays'].min()
skierdays_max2015 = clean_opps_2014to2015['SkierDays'].max()
skierdays_std2015 = clean_opps_2014to2015['SkierDays'].std()
skierdays_skew2015 = clean_opps_2014to2015['SkierDays'].skew()
skierdays_kurt2015 = clean_opps_2014to2015['SkierDays'].kurt()
skierdays_25qt2015 = clean_opps_2014to2015['SkierDays'].quantile(.25)
skierdays_50qt2015 = clean_opps_2014to2015['SkierDays'].quantile(.50)
skierdays_75qt2015 = clean_opps_2014to2015['SkierDays'].quantile(.75)


skierdays_sumstats_matrix = [['Group Sales Skier Days', ' 2014-2015', '2015-2016'],
                          ['Group Count', skierdays_groupcount2015, skierdays_groupcount2016],
                          ['Total Skier Days', skierdays_gross_sum2015, skierdays_gross_sum2016],
                          ['Mean Group Skier Days', skierdays_mean2015,skierdays_mean2016],
                          ['Median Group Skier Days', skierdays_median2015,skierdays_median2016],
                          ['Shortest Group Skier Days', skierdays_min2015,skierdays_min2016],
                          ['Longest Group Skier Days', skierdays_max2015, skierdays_max2016],
                          ['Skier Days Standard Deviation', skierdays_std2015, skierdays_std2016],
                          ['Skier Days Skew Factor', skierdays_skew2015, skierdays_skew2016],
                          ['Skier Days Kurtosis Factor', skierdays_kurt2015, skierdays_kurt2016], 
                          ['Skier Days 25th Quartile', skierdays_25qt2015, skierdays_25qt2016],
                          ['Skier Days 50th Quartile', skierdays_50qt2015, skierdays_50qt2016],
                          ['Skier Days 75th Quartile', skierdays_75qt2015, skierdays_75qt2016]]

skierdays_table = tools.FigureFactory.create_table(skierdays_sumstats_matrix, height_constant=60)

skierdays_table.layout.update({'height' : 500, 'width': 500})

skierdays_sumstats_table_plot_url = py.plot(skierdays_table, filename='SkierDays2015to2016-SummaryStats')
skierdays_sumstats_table_plot_url
py.iplot(skierdays_table, filename='SkierDays2015to2016-SummaryStats')

In [368]:
skierdays_graph = pd.DataFrame(columns= ['data2015','data2016'])
skierdays_graph['data2015'] = clean_opps_2014to2015['SkierDays']
skierdays_graph['data2016'] = clean_opps_2015to2016['SkierDays']

trace2015 = go.Histogram(
        x = skierdays_graph['data2015'].as_matrix(),
        opacity = 0.75,
        name = '2014-2015 Group Skier Days'
        )

trace2016 = go.Histogram(
        x = skierdays_graph['data2016'].as_matrix(),
        opacity = 0.70,
        name = '2015-2016 Group Skier Days'
        )

data = [trace2015, trace2016]
layout = go.Layout(
    title = 'Distribution of Group Skier Days',
    barmode='overlay')

fig = go.Figure(data=data, layout=layout)

skierdays_histograms_plot_url = py.iplot(fig, filename='cufflinks/skierdays-histograms')
skierdays_histograms_plot_url
py.iplot(fig, filename='cufflinks/skierdays-histograms')

In [375]:
duration_groupcount2016 = clean_opps_2015to2016['Duration'].count()
duration_gross_sum2016 = clean_opps_2015to2016['Duration'].sum()
duration_mean2016 = clean_opps_2015to2016['Duration'].mean()
duration_median2016 = clean_opps_2015to2016['Duration'].median()
duration_min2016 = clean_opps_2015to2016['Duration'].min()
duration_max2016 = clean_opps_2015to2016['Duration'].max()
duration_std2016 = clean_opps_2015to2016['Duration'].std()
duration_skew2016 = clean_opps_2015to2016['Duration'].skew()
duration_kurt2016 = clean_opps_2015to2016['Duration'].kurt()
duration_25qt2016 = clean_opps_2015to2016['Duration'].quantile(.25)
duration_50qt2016 = clean_opps_2015to2016['Duration'].quantile(.50)
duration_75qt2016 = clean_opps_2015to2016['Duration'].quantile(.75)

duration_groupcount2015 = clean_opps_2014to2015['Duration'].count()
duration_gross_sum2015 = clean_opps_2014to2015['Duration'].sum()
duration_mean2015 = clean_opps_2014to2015['Duration'].mean()
duration_median2015 = clean_opps_2014to2015['Duration'].median()
duration_min2015 = clean_opps_2014to2015['Duration'].min()
duration_max2015 = clean_opps_2014to2015['Duration'].max()
duration_std2015 = clean_opps_2014to2015['Duration'].std()
duration_skew2015 = clean_opps_2014to2015['Duration'].skew()
duration_kurt2015 = clean_opps_2014to2015['Duration'].kurt()
duration_25qt2015 = clean_opps_2014to2015['Duration'].quantile(.25)
duration_50qt2015 = clean_opps_2014to2015['Duration'].quantile(.50)
duration_75qt2015 = clean_opps_2014to2015['Duration'].quantile(.75)


duration_sumstats_matrix = [['Group Sales Visit Duration', ' 2014-2015', '2015-2016'],
                          ['Group Count', duration_groupcount2015, duration_groupcount2016],
                          ['Total Visit Days', duration_gross_sum2015, duration_gross_sum2016],
                          ['Mean Group Visit Duration', duration_mean2015,duration_mean2016],
                          ['Median Group Skier Duration', duration_median2015,duration_median2016],
                          ['Shortest Group Visit Duration', duration_min2015,duration_min2016],
                          ['Longest Group Visit Duration', duration_max2015, duration_max2016],
                          ['Visit Duration Standard Deviation', duration_std2015, duration_std2016],
                          ['Visit Duration Skew Factor', duration_skew2015, duration_skew2016],
                          ['Visit Duration Kurtosis Factor', duration_kurt2015, duration_kurt2016], 
                          ['Visit Duration 25th Quartile', duration_25qt2015, duration_25qt2016],
                          ['Visit Duration 50th Quartile', duration_50qt2015, duration_50qt2016],
                          ['Visit Duration 75th Quartile', duration_75qt2015, duration_75qt2016]]

duration_table = tools.FigureFactory.create_table(duration_sumstats_matrix, height_constant=60)

duration_table.layout.update({'height' : 500, 'width': 650})

duration_sumstats_table_plot_url = py.plot(duration_table, filename='Duration2015to2016-SummaryStats')
duration_sumstats_table_plot_url
py.iplot(duration_table, filename='Duration2015to2016-SummaryStats')

In [380]:
duration_graph = pd.DataFrame(columns= ['data2015','data2016'])
duration_graph['data2015'] = clean_opps_2014to2015['Duration']
duration_graph['data2016'] = clean_opps_2015to2016['Duration']

trace2015 = go.Histogram(
        x = duration_graph['data2015'].as_matrix(),
        opacity = 0.75,
        name = '2014-2015 Visit Durations'
        )

trace2016 = go.Histogram(
        x = duration_graph['data2016'].as_matrix(),
        opacity = 0.70,
        name = '2015-2016 Visit Durations'
        )

data = [trace2015, trace2016]
layout = go.Layout(
    title = 'Distribution of Group Visit Durations',
    barmode='overlay')

fig = go.Figure(data=data, layout=layout)

duration_histograms_plot_url = py.iplot(fig, filename='cufflinks/duration-histograms')
duration_histograms_plot_url
py.iplot(fig, filename='cufflinks/duration-histograms')

In [384]:
limitnames = [('$5,000','$12,000'),('$2,234','$4,999'),('$1,301','$2,234'), ('$717','$1,300'), ('$100','$716')]
limits = [(0,10),(11,71),(72,142),(143,214),(215,285)]
colors = ["rgb(0,116,217)","rgb(255,65,54)","rgb(133,20,75)","rgb(255,133,27)",'rgb(50,171,96)']
opportunities = []
scale = 100

for i in range(len(limits)):
    lim = limits[i]
    geo_info_sub = geo_info[lim[0]:lim[1]]
    opportunity = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = geo_info_sub['longitude'],
        lat = geo_info_sub['latitude'],
        text = geo_info_sub['text'],
        marker = dict(
            size = geo_info_sub['Revenue']/10,
            #sizeref = geo_info['Revenue'].max() / 1000,
            color = colors[i],
            line = dict(width=0.5, color = 'rgb(40,40,40)'),
            sizemode = 'area'
        ),
        name = '$' + '{0} - {1}'.format(limitnames[i][0],limitnames[i][1])
        #$'+ '{:,}'.format(revs)
        #namedict = {0:5000,10:12000,11:2234,71:4999,72:1301,142:2234,143:717,214:1300,215:100,285:716}
    )
    opportunities.append(opportunity)

layout = dict(
    title = '2014-2015 Group Sales Opportunties by Revenue<br>(Click legend to toggle traces)',
    showlegend = True,
    geo = dict(
        scope = 'usa',
        projection=dict(type='albers usa'),
        showland = True,
        landcolor = 'rgb(217,217,217)',
        subunitwidth=1,
        countrywidth=1,
        subunitcolor="rgb(255,255,255)",
        countrycolor="rgb(255,255,255)"
    )
)

fig = dict(data=opportunities, layout = layout)
map_url = py.iplot(fig, validate=False, filename='2014-2015-groupsales-map')
map_url

u'https://plot.ly/~AAbercrombie0492/51'

In [385]:
GS_2015to2016.head()

Unnamed: 0,ArrivalDate,AccountName,OpportunityName,GroupLeader,Revenue,SkierCount,SkierDays,VisitCount_AllTime,Industry,Transportation,LeaderStreet,LeaderCity,LeaderZipcode,LeaderState,GoogleSearchAddress,latitude,longitude,VisitCount_2015to2016
0,2016-03-18,"Abundant Life Fellowship - Leoti, KS","Abundant Life Fellowship Leoti, KS March 2016",Dustin Fritzmeier,2321.0,29,49.0,1,Church,Bus,PO Box 356,Leoti,67861.0,KS,"Poncha Springs, CO 81242, USA",38.510995,-106.080184,1
1,2016-02-26,Academy Online High School (AOHS) - Colorado S...,AOHS The Village February 2016,Nathan Gorsch,,17,,1,Colorado School,Bus,1110 Chapel Hills Dr,Colorado Springs,80924.0,CO,"Buena Vista, CO 81211, USA",38.842218,-106.131129,1
2,2016-01-12,Achgill International Students - Bryan,"Oasis Bryan, TX January 2016",,2180.0,11,18.0,2,Higher Education,Cars,,,,,"3135 Dragonfly Ct, Castle Rock, CO 80109, USA",39.39578,-104.901127,1
3,2016-03-13,Adams State University (ASU) - Alamosa,ASU ASAP Alamosa March 2016,Seth Clock,304.0,10,9.0,3,Business,Vans,208 Edgemont Blvd,Alamosa,81101.0,CO,"1835 S 129th E Ave, Tulsa, OK 74108, USA",36.136163,-95.832535,3
4,2016-02-07,Adams State University (ASU) - Alamosa,ASU ASAP Alamosa February 2016,Seth Clock,152.0,10,4.0,3,Business,Vans,208 Edgemont Blvd,Alamosa,81101.0,CO,"740 W Caramillo St, Colorado Springs, CO 80907...",38.857245,-104.837553,3
