### Prepping Data Challenge:  Salesforce Standard Connections for Dreamforce22 (week 38)

### Requirements
- Load the datasets
- Recreate the Opportunities Standard Connection  and Only include the fields listed above
- Output the data
- Now we want to create datasets which will easily answer some questions:

         - Who is the Opportunity Owner with the Highest Amount?
         - Who is the Account Owner with the Highest Amount?
         - Which Account has the most Opportunities & Amount?
- Create an output for each question

In [1]:
import pandas as pd
import numpy as np

In [2]:
#Load the datasets
dfo = (pd.read_csv(r"\Dataprep\2022\Opportunity.csv", 
                      usecols=['Id', 'Name', 'AccountId', 'StageName', 'Amount', 'OwnerId', 'CreatedById'])
            .rename(columns={'Id' : 'Opportunity ID','Name' : 'Opportunity Name'}) )

dfa = (pd.read_csv(r"\Dataprep\2022\Account.csv", usecols=['Id', 'Name', 'Type', 'CreatedById', 'OwnerId'])
            .rename(columns=lambda c: f"Account{'' if 'Id' in c else ' '}{c}"))

dfu = pd.read_csv(r"\Dataprep\2022\User.csv", usecols=['Id', 'Name', ])

In [3]:
dfo.head()

Unnamed: 0,Opportunity ID,AccountId,Opportunity Name,StageName,Amount,OwnerId,CreatedById
0,0068d000009MWGPAA4,0018d00000Ggx86AAB,Opportunity for Harper529,Closed Won,17621,0058d000004QHaiAAG,0058d000004B2qYAAS
1,0068d000009MWGQAA4,0018d00000Ggwy8AAB,Opportunity for Duncan873,Closed Won,2400250,0058d000004QHapAAG,0058d000004B2qYAAS
2,0068d000009MWGRAA4,0018d00000GgwzDAAR,Opportunity for Daniel875,Closed Won,5023650,0058d000004QHadAAG,0058d000004B2qYAAS
3,0068d000009MWGTAA4,0018d00000GgwugAAB,Opportunity for McKenzie881,Closed Won,1684050,0058d000004QHacAAG,0058d000004B2qYAAS
4,0068d000009MWGUAA4,0018d00000Ggx3VAAR,Opportunity for Duncan885,Qualification,413101,0058d000004QHapAAG,0058d000004B2qYAAS


In [4]:
dfa.head()

Unnamed: 0,AccountId,Account Name,Account Type,AccountOwnerId,AccountCreatedById
0,0018d00000GMjqpAAD,Sample Account for Entitlements,,0058d000004QHaCAAW,0058d000004QHaCAAW
1,0018d00000GgwutAAB,Flores188 Inc,Customer,0058d000004QHagAAG,0058d000004B2qYAAS
2,0018d00000GgwuuAAB,Hoffman387 Inc,Partner,0058d000004QHaaAAG,0058d000004B2qYAAS
3,0018d00000GgwuvAAB,Ross388 Inc,Customer,0058d000004QHadAAG,0058d000004B2qYAAS
4,0018d00000GgwuwAAB,Fleming389 Inc,Customer,0058d000004QHaqAAG,0058d000004B2qYAAS


In [5]:
dfu.head()

Unnamed: 0,Id,Name
0,0058d000004B2qYAAS,Lorna Brown
1,0058d000004QHaBAAW,Integration User
2,0058d000004QHaCAAW,Automated Process
3,0058d000004QHaDAAW,Security User
4,0058d000004QHaFAAW,Data.com Clean


In [6]:
# Join account info to the opportunity info
dfo = dfo.merge(dfa, on='AccountId', how='left')

# add the user info
users = dict(zip(dfu['Id'], dfu['Name']))
dfo['Created By Name'] = dfo['CreatedById'].replace(users)
dfo['Owner Name'] = dfo['OwnerId'].replace(users)
dfo['Account Created By Name'] = dfo['AccountCreatedById'].replace(users)
dfo['Account Owner Name'] = dfo['AccountOwnerId'].replace(users)
dfo = dfo.drop(columns=['AccountCreatedById', 'AccountOwnerId'])

In [7]:
dfo.head()

Unnamed: 0,Opportunity ID,AccountId,Opportunity Name,StageName,Amount,OwnerId,CreatedById,Account Name,Account Type,Created By Name,Owner Name,Account Created By Name,Account Owner Name
0,0068d000009MWGPAA4,0018d00000Ggx86AAB,Opportunity for Harper529,Closed Won,17621,0058d000004QHaiAAG,0058d000004B2qYAAS,Bowman347 Inc,Customer,Lorna Brown,Dennis Howard,Lorna Brown,Irene McCoy
1,0068d000009MWGQAA4,0018d00000Ggwy8AAB,Opportunity for Duncan873,Closed Won,2400250,0058d000004QHapAAG,0058d000004B2qYAAS,Cox450 Inc,Customer,Lorna Brown,Eric Sanchez,Lorna Brown,Irene McCoy
2,0068d000009MWGRAA4,0018d00000GgwzDAAR,Opportunity for Daniel875,Closed Won,5023650,0058d000004QHadAAG,0058d000004B2qYAAS,Stevenson954 Inc,Customer,Lorna Brown,Eric Gutierrez,Lorna Brown,Kelly Frazier
3,0068d000009MWGTAA4,0018d00000GgwugAAB,Opportunity for McKenzie881,Closed Won,1684050,0058d000004QHacAAG,0058d000004B2qYAAS,Christensen164 Inc,Customer,Lorna Brown,Irene Kelley,Lorna Brown,Harold Campbell
4,0068d000009MWGUAA4,0018d00000Ggx3VAAR,Opportunity for Duncan885,Qualification,413101,0058d000004QHapAAG,0058d000004B2qYAAS,Murphy861 Inc,Partner,Lorna Brown,Eric Sanchez,Lorna Brown,Julie Chavez


In [8]:
#output the data
dfo.to_csv('wk38-output.csv', index=False)

In [9]:
# 1 - who is the opportunity owner with the highest amount?
( dfo.groupby('Owner Name', as_index=False)['Amount'].sum()
        .sort_values(by='Amount', ascending=False)
        .to_csv('wk38-opportunity-owner-with-highest-amount.csv', index=False) )


# 2 - who is the account owner with the highest amount?
( dfo.groupby('Account Owner Name', as_index=False)['Amount'].sum()
        .sort_values(by='Amount', ascending=False)
        .to_csv('wk38-account-owner-with-highest-amount.csv', index=False) )


# 3 - which account has the most opportunities & amount?
( dfo.groupby('Account Name', as_index=False).agg(Number_of_Opportunities = ('Opportunity ID', 'count'),
                                                     Amount=('Amount', 'sum'))
        .rename(columns=lambda c: c.replace('_', ' '))
        .to_csv('wk38-account-output.csv', index=False) )