In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv("mpg.csv")

In [3]:
df

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86,2790,15.6,82,1,ford mustang gl
394,44.0,4,97.0,52,2130,24.6,82,2,vw pickup
395,32.0,4,135.0,84,2295,11.6,82,1,dodge rampage
396,28.0,4,120.0,79,2625,18.6,82,1,ford ranger


In [11]:
df.groupby('mpg').mean(numeric_only=True)

Unnamed: 0_level_0,cylinders,displacement,weight,acceleration,model_year,origin
mpg,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9.0,8.0,304.00,4732.00,18.500000,70.0,1.0
10.0,8.0,333.50,4495.50,14.500000,70.0,1.0
11.0,8.0,374.25,4419.00,12.375000,72.0,1.0
12.0,8.0,394.50,4786.50,12.083333,72.5,1.0
13.0,8.0,353.00,4254.45,12.935000,73.3,1.0
...,...,...,...,...,...,...
43.4,4.0,90.00,2335.00,23.700000,80.0,2.0
44.0,4.0,97.00,2130.00,24.600000,82.0,2.0
44.3,4.0,90.00,2085.00,21.700000,80.0,2.0
44.6,4.0,91.00,1850.00,13.800000,80.0,3.0


In [12]:
avg_year = df.groupby('model_year').mean()

TypeError: agg function failed [how->mean,dtype->object]

The error you're encountering happens because Pandas is trying to compute the mean of non-numeric data. Specifically, it seems like one or more columns in your DataFrame contain strings or other non-numeric values, and Pandas can't compute the mean for those columns.

To resolve this issue, you can use the numeric_only=True argument to tell Pandas to compute the mean only on numeric columns. Here’s how you can modify your code:

python

```avg_year = df.groupby('model_year').mean(numeric_only=True)``` 

This will ignore any non-numeric columns and compute the mean only for the numeric ones.

Alternatively, you can filter your DataFrame to include only numeric columns before applying the groupby:

python

```numeric_columns = df.select_dtypes(include='number').columns``` \
```avg_year = df.groupby('model_year')[numeric_columns].mean()```

In [213]:
import numpy as np
import pandas as pd

In [214]:
data_one = {'A': ['A0', 'A1', 'A2', 'A3'],'B': ['B0', 'B1', 'B2', 'B3']}

In [215]:
data_two = {'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}

In [216]:
one = pd.DataFrame(data_one)

In [217]:
two = pd.DataFrame(data_two)

In [218]:
one

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


In [219]:
two

Unnamed: 0,C,D
0,C0,D0
1,C1,D1
2,C2,D2
3,C3,D3


In [220]:
axis0 = pd.concat([one,two],axis=0)

In [221]:
axis0

Unnamed: 0,A,B,C,D
0,A0,B0,,
1,A1,B1,,
2,A2,B2,,
3,A3,B3,,
0,,,C0,D0
1,,,C1,D1
2,,,C2,D2
3,,,C3,D3


In [222]:
axis1 = pd.concat([one,two],axis=1)

In [223]:
axis1

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [224]:
two.columns = one.columns

In [225]:
pd.concat([one,two])

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3
0,C0,D0
1,C1,D1
2,C2,D2
3,C3,D3


In [226]:
registrations = pd.DataFrame({'reg_id':[1,2,3,4],'name':['Andrew','Bobo','Claire','David']})
logins = pd.DataFrame({'log_id':[1,2,3,4],'name':['Xavier','Andrew','Yolanda','Bobo']})

In [227]:
registrations

Unnamed: 0,reg_id,name
0,1,Andrew
1,2,Bobo
2,3,Claire
3,4,David


In [228]:
logins

Unnamed: 0,log_id,name
0,1,Xavier
1,2,Andrew
2,3,Yolanda
3,4,Bobo


-----------
----------