In [5]:
import pandas as pd

# Load the dataset
file_path = 'automotive_data.xlsx'  
xls = pd.ExcelFile(file_path)

# Load the Sales and Salesmen DataFrames
sales_df = pd.read_excel(xls, sheet_name='Sales')
salesmen_df = pd.read_excel(xls, sheet_name='Salesmen')

# Calculate total sales, average sale price, and total profit per salesman
sales_performance = sales_df.groupby('salesman_id').agg(
    total_sales=('sale_price', 'sum'),
    average_sale_price=('sale_price', 'mean'),
    total_profit=('profit', 'sum')
).reset_index()

# Merge with the Salesmen DataFrame to get the salesman's name
sales_performance = sales_performance.merge(salesmen_df[['salesman_id', 'name']], on='salesman_id')

# Display the results
print(sales_performance)


    salesman_id  total_sales  average_sale_price  total_profit  \
0             1   1541726.95        21714.464085     338314.80   
1             2   1656742.75        20709.284375     314992.05   
2             3   1740834.70        20009.594253     351331.30   
3             4   1622801.55        17832.984066     193920.70   
4             5   1243435.25        20055.407258     230624.50   
5             6   1707740.25        19188.092697     314182.25   
6             7   1871077.50        22012.676471     311992.15   
7             8   1731896.50        19680.642045     317687.95   
8             9   1807799.50        20086.661111     413378.05   
9            10   1706316.20        21598.939241     300078.60   
10           11   1703005.75        19134.896067     376218.45   
11           12   1913832.55        21503.736517     370746.35   

                name  
0         John Smith  
1       Alex Johnson  
2   Michael Williams  
3        Sarah Jones  
4      Jessica Brown  
5  

In [6]:
# Calculate total profit per vehicle make and model
profitability_analysis = sales_df.groupby(['make', 'model']).agg(
    total_profit=('profit', 'sum'),
    total_sales=('sale_price', 'sum'),
    average_sale_price=('sale_price', 'mean')
).reset_index()

# Sort the results by total profit in descending order
profitability_analysis = profitability_analysis.sort_values(by='total_profit', ascending=False)

# Display the results
print(profitability_analysis)

             make       model  total_profit  total_sales  average_sale_price
64          Tesla     Model 3     168147.00   1016071.00        35036.931034
53  Mercedes-Benz     E-Class      85779.20    420919.65        19132.711364
23           Ford       Focus      84938.35    219583.70        14638.913333
58         Nissan       Rogue      84016.75    352961.25        23530.750000
0           Acura         ILX      81333.00    478317.00        26573.166667
..            ...         ...           ...          ...                 ...
2           Acura         RDX      13385.00    265128.75        24102.613636
33       Infiniti        QX60      13149.00    566384.00        35399.000000
65          Tesla     Model S       7197.00    254266.00        23115.090909
35       Infiniti        QX80        489.00    272079.00        30231.000000
57         Nissan  Pathfinder     -19721.70    158046.50        14367.863636

[79 rows x 5 columns]


In [7]:
# Load the Sales and Customers DataFrames
sales_df = pd.read_excel(xls, sheet_name='Sales')
customers_df = pd.read_excel(xls, sheet_name='Customers')

# Merge the Sales DataFrame with the Customers DataFrame
customer_sales_analysis = sales_df.merge(customers_df, left_on='customer_id', right_on='customer_id')

# Group by vehicle make and model, and calculate average credit score and annual income
customer_insights = customer_sales_analysis.groupby(['make', 'model']).agg(
    average_credit_score=('credit_score', 'mean'),
    average_annual_income=('annual_income', 'mean'),
    total_sales=('sale_price', 'sum'),
    total_profit=('profit', 'sum')
).reset_index()

# Sort the results by total sales in descending order
customer_insights = customer_insights.sort_values(by='total_sales', ascending=False)

# Display the results
print(customer_insights)

             make     model  average_credit_score  average_annual_income  \
64          Tesla   Model 3            586.241379           75363.275862   
55  Mercedes-Benz       GLE            570.692308           75434.307692   
34       Infiniti      QX70            608.923077           76735.153846   
33       Infiniti      QX60            559.187500           67755.625000   
32       Infiniti       Q50            561.166667           65156.000000   
..            ...       ...                   ...                    ...   
68         Toyota   Corolla            614.571429           80446.857143   
24          Honda    Accord            568.076923           82417.538462   
50          Mazda    Mazda3            618.000000           68354.857143   
63         Subaru   Outback            542.166667           83888.666667   
29        Hyundai  Santa Fe            560.428571           81917.857143   

    total_sales  total_profit  
64   1016071.00     168147.00  
55    675252.00      71

In [15]:
# Parse the relevant sheets into DataFrames
sales_df = xls.parse('Sales')
customers_df = xls.parse('Customers')

# Merge sales and customer data
merged_data = pd.merge(sales_df, customers_df, on='customer_id')

# Group by vehicle make and model
profit_and_income_data = merged_data.groupby(['make', 'model']).agg(
    avg_credit_score=('credit_score', 'mean'),
    total_profit=('profit', 'sum')
).reset_index()

# Calculate the correlation
correlation = profit_and_income_data['avg_credit_score'].corr(profit_and_income_data['total_profit'])

# Print the result
print(f"Correlation between average credit score and total profit: {correlation}")



Correlation between average credit score and total profit: -0.031312581375292775


In [16]:
# Ensure 'sale_date' is in datetime format
sales_df['sale_date'] = pd.to_datetime(sales_df['sale_date'])

# Extract year and month
sales_df['year'] = sales_df['sale_date'].dt.year
sales_df['month'] = sales_df['sale_date'].dt.month

# Group by year and month to count the number of sales
sales_trends = sales_df.groupby(['year', 'month']).agg(
    number_of_sales=('sale_id', 'count')
).reset_index()

# Print or display the results
print(sales_trends)


    year  month  number_of_sales
0   2014      1               24
1   2014      2               17
2   2014      3               18
3   2014      4               22
4   2014      5               23
5   2014      6               22
6   2014      7               20
7   2014      8               17
8   2014      9               16
9   2014     10               28
10  2014     11               17
11  2014     12               21
12  2015      1               18
13  2015      2               38
14  2015      3               30
15  2015      4               32
16  2015      5               26
17  2015      6               29
18  2015      7               26
19  2015      8               23
20  2015      9               20
21  2015     10               15
22  2015     11               30
23  2015     12               20
24  2016      1               41
25  2016      2               27
26  2016      3               42
27  2016      4               46
28  2016      5               37
29  2016  

In [18]:
# Group by salesman ID and year to count the number of vehicles sold
sales_by_salesman_year = sales_df.groupby(['salesman_id', 'year']).agg(
    vehicles_sold=('sale_id', 'count')
).reset_index()

# Merge with the salesmen data to include salesperson names
sales_by_salesman_year = pd.merge(sales_by_salesman_year, salesmen_df[['salesman_id', 'name']], on='salesman_id')

# Display the result
print(sales_by_salesman_year)



    salesman_id  year  vehicles_sold              name
0             1  2014             17        John Smith
1             1  2015             24        John Smith
2             1  2016             30        John Smith
3             2  2014             19      Alex Johnson
4             2  2015             26      Alex Johnson
5             2  2016             35      Alex Johnson
6             3  2014             26  Michael Williams
7             3  2015             24  Michael Williams
8             3  2016             37  Michael Williams
9             4  2014             22       Sarah Jones
10            4  2015             30       Sarah Jones
11            4  2016             39       Sarah Jones
12            5  2014             18     Jessica Brown
13            5  2015             13     Jessica Brown
14            5  2016             31     Jessica Brown
15            6  2014             22       Chris Davis
16            6  2015             28       Chris Davis
17        

In [20]:
# Calculate the average days a vehicle stays on the lot
average_days_on_lot = sales_df['days_on_lot'].mean()

# Print the result
print(f"Average Days on Lot: {average_days_on_lot:.2f}")



Average Days on Lot: 44.65


In [21]:
# Identify repeat customers and count the number of purchases they made
repeat_customers = sales_df.groupby('customer_id').agg(
    purchase_count=('sale_id', 'count')
).reset_index()

# Filter for customers who have made more than one purchase
repeat_customers = repeat_customers[repeat_customers['purchase_count'] > 1]

# Merge with customer data to include customer details
repeat_customers_details = pd.merge(repeat_customers, customers_df, on='customer_id')

# Display the result
print(repeat_customers_details)


     customer_id  purchase_count first_name  last_name  credit_score  \
0              3               2    Richard    Johnson           309   
1              8               2     Robert   Martinez           614   
2             17               2      David     Miller           399   
3             23               2     Joseph      Brown           844   
4             26               2     Joseph   Anderson           451   
..           ...             ...        ...        ...           ...   
135          812               2    Charles      Jones           341   
136          813               2    William  Rodriguez           774   
137          832               2     Daniel      Moore           585   
138          833               2    Anthony     Miller           458   
139          839               2    Matthew    Jackson           771   

     annual_income employement_status                            address  \
0            74113         Unemployed        5050 Oak St, R