# Using Pandas - Analysing NBA players

In [1]:
import pandas as pd
import numpy as np
"""Observing basic statisitcs on the data
Filtering(loc)
Adding and manipulating columns(insert(3), dropna, astype)
Sorting (nlargest)
Renaming
grooping
agg
apply lamda
write to file
"""

In [2]:
# loading the csv to memory to a df object
nba = pd.read_csv("nba.csv")

## Observing basic statisitcs on the data

In [3]:
nba.head()  # returning the first 5 rows

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [4]:
nba[['Name', 'Salary']].tail(10)  # returning the last 10 rows

Unnamed: 0,Name,Salary
448,Gordon Hayward,15409570.0
449,Rodney Hood,1348440.0
450,Joe Ingles,2050000.0
451,Chris Johnson,981348.0
452,Trey Lyles,2239800.0
453,Shelvin Mack,2433333.0
454,Raul Neto,900000.0
455,Tibor Pleiss,2900000.0
456,Jeff Withey,947276.0
457,,


In [5]:
nba.info()  # 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 458 entries, 0 to 457
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null object
Number      457 non-null float64
Position    457 non-null object
Age         457 non-null float64
Height      457 non-null object
Weight      457 non-null float64
College     373 non-null object
Salary      446 non-null float64
dtypes: float64(4), object(5)
memory usage: 32.3+ KB


In [6]:
nba.columns  # returns column names (this is also how you can update the columns)

Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
       'College', 'Salary'],
      dtype='object')

In [7]:
nba.index  # returns index name

RangeIndex(start=0, stop=458, step=1)

In [8]:
nba.describe()  # describing the statistics for numerical columns

Unnamed: 0,Number,Age,Weight,Salary
count,457.0,457.0,457.0,446.0
mean,17.678337,26.938731,221.522976,4842684.0
std,15.96609,4.404016,26.368343,5229238.0
min,0.0,19.0,161.0,30888.0
25%,5.0,24.0,200.0,1044792.0
50%,13.0,26.0,220.0,2839073.0
75%,25.0,30.0,240.0,6500000.0
max,99.0,40.0,307.0,25000000.0


Choosing columns (notice the double brackets)

In [9]:
nba[['Weight', 'Age']].mean()  # mean on the columns (same as axis=0 or axis='index')

Weight    221.522976
Age        26.938731
dtype: float64

In [10]:
nba.sum(axis=1)  # mean on the rows, same as axis='columns'

0      7730542.0
1      6796476.0
2          262.0
3      1148875.0
4      5000268.0
         ...    
453    2433570.0
454     900228.0
455    2900303.0
456     947557.0
457          0.0
Length: 458, dtype: float64

In [11]:
nba.count()

Name        457
Team        457
Number      457
Position    457
Age         457
Height      457
Weight      457
College     373
Salary      446
dtype: int64

In [12]:
len(pd.unique(nba['Team']))

31

In [13]:
nba['Age'].nunique()

22

## Filtering

In [14]:
a = nba.loc[nba['Salary'] > 5000000, ['Name', 'Salary']]  # saving a new data frame. It has the same index as before
print(a)
a['Salary'].max()  # calculaing max on the column salary

                 Name      Salary
0       Avery Bradley   7730337.0
1         Jae Crowder   6796117.0
5        Amir Johnson  12000000.0
11      Isaiah Thomas   6912869.0
19       Jarrett Jack   6300000.0
..                ...         ...
433  Gerald Henderson   6000000.0
434       Chris Kaman   5016000.0
444        Alec Burks   9463484.0
446    Derrick Favors  12000000.0
448    Gordon Hayward  15409570.0

[144 rows x 2 columns]


25000000.0

In [15]:
nba['Salary'] > 5000000  #filtering the column salary (returns a new DF of boolean values)

0       True
1       True
2      False
3      False
4      False
       ...  
453    False
454    False
455    False
456    False
457    False
Name: Salary, Length: 458, dtype: bool

In [16]:
a['Salary'].iloc[:10]

0      7730337.0
1      6796117.0
5     12000000.0
11     6912869.0
19     6300000.0
23    19689000.0
29    11235955.0
30     8000000.0
33    22875000.0
34     7402812.0
Name: Salary, dtype: float64

In [17]:
nba.loc[(nba['Salary'] < 2000000) & (nba['Age'] < 25), ['Name', 'Salary']]


Unnamed: 0,Name,Salary
3,R.J. Hunter,1148640.0
6,Jordan Mickey,1170960.0
8,Terry Rozier,1824360.0
13,James Young,1749840.0
16,Markel Brown,845059.0
...,...,...
430,Allen Crabbe,947276.0
438,Luis Montero,525093.0
447,Rudy Gobert,1175880.0
449,Rodney Hood,1348440.0


In [18]:
nba.query('Salary < 2000000 & Age <25')[['Name', 'Salary']]

Unnamed: 0,Name,Salary
3,R.J. Hunter,1148640.0
6,Jordan Mickey,1170960.0
8,Terry Rozier,1824360.0
13,James Young,1749840.0
16,Markel Brown,845059.0
...,...,...
430,Allen Crabbe,947276.0
438,Luis Montero,525093.0
447,Rudy Gobert,1175880.0
449,Rodney Hood,1348440.0


## Adding and manipulating columns

In [19]:
nba["Sport"] = "Basketball"  # all rows of columns Sport receive the same value Basketball

In [20]:
nba

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Sport
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Basketball
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Basketball
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,Basketball
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0,Basketball
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0,Basketball
...,...,...,...,...,...,...,...,...,...,...
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0,Basketball
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0,Basketball
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0,Basketball
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0,Basketball


In [21]:
nba.insert(3, column="League", value="NBA") # Another way, and the column doesn't have to be placed last

In [22]:
nba["Weight"].mul(0.453592)

0       81.646560
1      106.594120
2       92.986360
3       83.914520
4      104.779752
          ...    
453     92.079176
454     81.192968
455    116.119552
456    104.779752
457           NaN
Name: Weight, Length: 458, dtype: float64

In [23]:
nba["Weight_kg"] = nba["Weight"] * 0.453592 # adding a new column + manipulating a column

What if we want to enter different values to rows?

In [24]:
mil = nba["Salary"].div(1000000)
nba.insert(9, column="Salary_M", value=mil) # at index 9 add a new column with the value in mil

In [25]:
nba['rookie_deal'] = True
nba['rookie_deal'].where(nba['Age'] <= 22, False, inplace=True)  # Replace values where the condition is False.
nba.head()

Unnamed: 0,Name,Team,Number,League,Position,Age,Height,Weight,College,Salary_M,Salary,Sport,Weight_kg,rookie_deal
0,Avery Bradley,Boston Celtics,0.0,NBA,PG,25.0,6-2,180.0,Texas,7.730337,7730337.0,Basketball,81.64656,False
1,Jae Crowder,Boston Celtics,99.0,NBA,SF,25.0,6-6,235.0,Marquette,6.796117,6796117.0,Basketball,106.59412,False
2,John Holland,Boston Celtics,30.0,NBA,SG,27.0,6-5,205.0,Boston University,,,Basketball,92.98636,False
3,R.J. Hunter,Boston Celtics,28.0,NBA,SG,22.0,6-5,185.0,Georgia State,1.14864,1148640.0,Basketball,83.91452,True
4,Jonas Jerebko,Boston Celtics,8.0,NBA,PF,29.0,6-10,231.0,,5.0,5000000.0,Basketball,104.779752,False


## Dealing with missing values

In [26]:
nba.isna()  # checks which values are missing

Unnamed: 0,Name,Team,Number,League,Position,Age,Height,Weight,College,Salary_M,Salary,Sport,Weight_kg,rookie_deal
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,True,True,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453,False,False,False,False,False,False,False,False,False,False,False,False,False,False
454,False,False,False,False,False,False,False,False,True,False,False,False,False,False
455,False,False,False,False,False,False,False,False,True,False,False,False,False,False
456,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [27]:
nba.dropna().head(3)

Unnamed: 0,Name,Team,Number,League,Position,Age,Height,Weight,College,Salary_M,Salary,Sport,Weight_kg,rookie_deal
0,Avery Bradley,Boston Celtics,0.0,NBA,PG,25.0,6-2,180.0,Texas,7.730337,7730337.0,Basketball,81.64656,False
1,Jae Crowder,Boston Celtics,99.0,NBA,SF,25.0,6-6,235.0,Marquette,6.796117,6796117.0,Basketball,106.59412,False
3,R.J. Hunter,Boston Celtics,28.0,NBA,SG,22.0,6-5,185.0,Georgia State,1.14864,1148640.0,Basketball,83.91452,True


In [28]:
nba.dropna(how="all").head(3) # droping rows if all values of the row is null values like the last row (457)
# thats why John Holland is stil in the data frame

Unnamed: 0,Name,Team,Number,League,Position,Age,Height,Weight,College,Salary_M,Salary,Sport,Weight_kg,rookie_deal
0,Avery Bradley,Boston Celtics,0.0,NBA,PG,25.0,6-2,180.0,Texas,7.730337,7730337.0,Basketball,81.64656,False
1,Jae Crowder,Boston Celtics,99.0,NBA,SF,25.0,6-6,235.0,Marquette,6.796117,6796117.0,Basketball,106.59412,False
2,John Holland,Boston Celtics,30.0,NBA,SG,27.0,6-5,205.0,Boston University,,,Basketball,92.98636,False


In [29]:
nba.dropna(subset=['Salary']).head(3) # dropif any of the collumns in subset are missing

Unnamed: 0,Name,Team,Number,League,Position,Age,Height,Weight,College,Salary_M,Salary,Sport,Weight_kg,rookie_deal
0,Avery Bradley,Boston Celtics,0.0,NBA,PG,25.0,6-2,180.0,Texas,7.730337,7730337.0,Basketball,81.64656,False
1,Jae Crowder,Boston Celtics,99.0,NBA,SF,25.0,6-6,235.0,Marquette,6.796117,6796117.0,Basketball,106.59412,False
3,R.J. Hunter,Boston Celtics,28.0,NBA,SG,22.0,6-5,185.0,Georgia State,1.14864,1148640.0,Basketball,83.91452,True


In [30]:
nba.drop(columns = ['Sport', 'rookie_deal', 'League'], inplace=True)  # drop these columns from the data base

In [31]:
nba.dropna(how="all", inplace=True) 

nba["College"].fillna(value="None", inplace=True)
nba["Salary"].fillna(value=nba["Salary"].mean(), inplace=True)
nba.tail()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary_M,Salary,Weight_kg
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2.2398,2239800.0,106.140528
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2.433333,2433333.0,92.079176
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,0.9,900000.0,81.192968
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2.9,2900000.0,116.119552
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,0.947276,947276.0,104.779752


In [32]:
nba['Number'] = nba['Number'].astype('int')  # converting data type
# pd.to_datetime()

In [33]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 11 columns):
Name         457 non-null object
Team         457 non-null object
Number       457 non-null int64
Position     457 non-null object
Age          457 non-null float64
Height       457 non-null object
Weight       457 non-null float64
College      457 non-null object
Salary_M     446 non-null float64
Salary       457 non-null float64
Weight_kg    457 non-null float64
dtypes: float64(5), int64(1), object(5)
memory usage: 42.8+ KB


## Sorting

In [34]:
nba.Name.values

array(['Avery Bradley', 'Jae Crowder', 'John Holland', 'R.J. Hunter',
       'Jonas Jerebko', 'Amir Johnson', 'Jordan Mickey', 'Kelly Olynyk',
       'Terry Rozier', 'Marcus Smart', 'Jared Sullinger', 'Isaiah Thomas',
       'Evan Turner', 'James Young', 'Tyler Zeller', 'Bojan Bogdanovic',
       'Markel Brown', 'Wayne Ellington', 'Rondae Hollis-Jefferson',
       'Jarrett Jack', 'Sergey Karasev', 'Sean Kilpatrick',
       'Shane Larkin', 'Brook Lopez', 'Chris McCullough', 'Willie Reed',
       'Thomas Robinson', 'Henry Sims', 'Donald Sloan', 'Thaddeus Young',
       'Arron Afflalo', 'Lou Amundson', 'Thanasis Antetokounmpo',
       'Carmelo Anthony', 'Jose Calderon', 'Cleanthony Early',
       'Langston Galloway', 'Jerian Grant', 'Robin Lopez', "Kyle O'Quinn",
       'Kristaps Porzingis', 'Kevin Seraphin', 'Lance Thomas',
       'Sasha Vujacic', 'Derrick Williams', 'Tony Wroten', 'Elton Brand',
       'Isaiah Canaan', 'Robert Covington', 'Joel Embiid', 'Jerami Grant',
       'Richaun H

In [35]:
nba.sort_values("Name").head() # you need to specify which colume you want to sort by

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary_M,Salary,Weight_kg
152,Aaron Brooks,Chicago Bulls,0,PG,31.0,6-0,161.0,Oregon,2.25,2250000.0,73.028312
356,Aaron Gordon,Orlando Magic,0,PF,20.0,6-9,220.0,Arizona,4.17168,4171680.0,99.79024
328,Aaron Harrison,Charlotte Hornets,9,SG,21.0,6-6,210.0,Kentucky,0.525093,525093.0,95.25432
404,Adreian Payne,Minnesota Timberwolves,33,PF,25.0,6-10,237.0,Michigan State,1.93884,1938840.0,107.501304
312,Al Horford,Atlanta Hawks,15,C,30.0,6-10,245.0,Florida,12.0,12000000.0,111.13004


In [36]:
nba.sort_values("Salary", ascending=False).head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary_M,Salary,Weight_kg
109,Kobe Bryant,Los Angeles Lakers,24,SF,37.0,6-6,212.0,,25.0,25000000.0,96.161504
169,LeBron James,Cleveland Cavaliers,23,SF,31.0,6-8,250.0,,22.9705,22970500.0,113.398
33,Carmelo Anthony,New York Knicks,7,SF,32.0,6-8,240.0,Syracuse,22.875,22875000.0,108.86208
251,Dwight Howard,Houston Rockets,12,C,30.0,6-11,265.0,,22.359364,22359364.0,120.20188
339,Chris Bosh,Miami Heat,1,PF,32.0,6-11,235.0,Georgia Tech,22.19273,22192730.0,106.59412


In [37]:
nba.sort_values(["Team", "Name"]).head() # first sort the team and then the name in each team

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary_M,Salary,Weight_kg
312,Al Horford,Atlanta Hawks,15,C,30.0,6-10,245.0,Florida,12.0,12000000.0,111.13004
318,Dennis Schroder,Atlanta Hawks,17,PG,22.0,6-1,172.0,,1.7634,1763400.0,78.017824
323,Jeff Teague,Atlanta Hawks,0,PG,27.0,6-2,186.0,Wake Forest,8.0,8000000.0,84.368112
309,Kent Bazemore,Atlanta Hawks,24,SF,26.0,6-5,201.0,Old Dominion,2.0,2000000.0,91.171992
311,Kirk Hinrich,Atlanta Hawks,12,SG,35.0,6-4,190.0,Kansas,2.85494,2854940.0,86.18248


In [38]:
nba.sort_values(["Team", "Name"], ascending=[True, False]).head()# if we want the name to be descending and team ascending \
# and of course you can sort as many columns as you want

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary_M,Salary,Weight_kg
322,Walter Tavares,Atlanta Hawks,22,C,24.0,7-3,260.0,,1.0,1000000.0,117.93392
310,Tim Hardaway Jr.,Atlanta Hawks,10,SG,24.0,6-6,205.0,Michigan,1.30452,1304520.0,92.98636
321,Tiago Splitter,Atlanta Hawks,11,C,31.0,6-11,245.0,,9.75625,9756250.0,111.13004
320,Thabo Sefolosha,Atlanta Hawks,25,SF,32.0,6-7,220.0,,4.0,4000000.0,99.79024
315,Paul Millsap,Atlanta Hawks,4,PF,31.0,6-8,246.0,Louisiana Tech,18.671659,18671659.0,111.583632


In [39]:
nba.nlargest(n=5, columns="Salary") # more efficient

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary_M,Salary,Weight_kg
109,Kobe Bryant,Los Angeles Lakers,24,SF,37.0,6-6,212.0,,25.0,25000000.0,96.161504
169,LeBron James,Cleveland Cavaliers,23,SF,31.0,6-8,250.0,,22.9705,22970500.0,113.398
33,Carmelo Anthony,New York Knicks,7,SF,32.0,6-8,240.0,Syracuse,22.875,22875000.0,108.86208
251,Dwight Howard,Houston Rockets,12,C,30.0,6-11,265.0,,22.359364,22359364.0,120.20188
339,Chris Bosh,Miami Heat,1,PF,32.0,6-11,235.0,Georgia Tech,22.19273,22192730.0,106.59412


## Renaming

In [40]:
nba.rename({col: col.lower().replace(" ", '_') for col in nba.columns})
nba.rename(columns={"number": "player_number", "name": "player_name"}, inplace=False)


Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary_M,Salary,Weight_kg
0,Avery Bradley,Boston Celtics,0,PG,25.0,6-2,180.0,Texas,7.730337,7.730337e+06,81.646560
1,Jae Crowder,Boston Celtics,99,SF,25.0,6-6,235.0,Marquette,6.796117,6.796117e+06,106.594120
2,John Holland,Boston Celtics,30,SG,27.0,6-5,205.0,Boston University,,4.842684e+06,92.986360
3,R.J. Hunter,Boston Celtics,28,SG,22.0,6-5,185.0,Georgia State,1.148640,1.148640e+06,83.914520
4,Jonas Jerebko,Boston Celtics,8,PF,29.0,6-10,231.0,,5.000000,5.000000e+06,104.779752
...,...,...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41,PF,20.0,6-10,234.0,Kentucky,2.239800,2.239800e+06,106.140528
453,Shelvin Mack,Utah Jazz,8,PG,26.0,6-3,203.0,Butler,2.433333,2.433333e+06,92.079176
454,Raul Neto,Utah Jazz,25,PG,24.0,6-1,179.0,,0.900000,9.000000e+05,81.192968
455,Tibor Pleiss,Utah Jazz,21,C,26.0,7-3,256.0,,2.900000,2.900000e+06,116.119552


## Grouping

In [41]:
nba[['Team', 'Salary']].groupby('Team').max()

Unnamed: 0_level_0,Salary
Team,Unnamed: 1_level_1
Atlanta Hawks,18671659.0
Boston Celtics,12000000.0
Brooklyn Nets,19689000.0
Charlotte Hornets,13500000.0
Chicago Bulls,20093064.0
Cleveland Cavaliers,22970500.0
Dallas Mavericks,16407500.0
Denver Nuggets,14000000.0
Detroit Pistons,16000000.0
Golden State Warriors,15501000.0


In [42]:
nba.groupby('Team')['Salary_M'].mean().sort_values(ascending=False)

Team
Cleveland Cavaliers       7.642049
Miami Heat                6.347359
Los Angeles Clippers      6.323643
Oklahoma City Thunder     6.251020
Golden State Warriors     5.924600
Chicago Bulls             5.785559
San Antonio Spurs         5.629516
Memphis Grizzlies         5.467920
Charlotte Hornets         5.222728
Washington Wizards        5.088576
Houston Rockets           5.018868
Atlanta Hawks             4.860197
Los Angeles Lakers        4.784695
Sacramento Kings          4.778911
Dallas Mavericks          4.746582
Toronto Raptors           4.741174
Minnesota Timberwolves    4.593054
New York Knicks           4.581494
Detroit Pistons           4.477884
Indiana Pacers            4.450122
New Orleans Pelicans      4.355304
Milwaukee Bucks           4.350220
Orlando Magic             4.297248
Denver Nuggets            4.294424
Phoenix Suns              4.229676
Utah Jazz                 4.204006
Boston Celtics            4.181505
Brooklyn Nets             3.501898
Portland Trail 

In [43]:
position = nba.groupby(['Position'])['Weight'].agg(['mean', 'max', 'min', 'count'])

In [44]:
position.index

Index(['C', 'PF', 'PG', 'SF', 'SG'], dtype='object', name='Position')

In [45]:
position.loc['C']

mean     254.205128
max      307.000000
min      220.000000
count     78.000000
Name: C, dtype: float64

## Using flunctions (Apply)

In [46]:
def find_city(team):
    if team  == 'Portland Trail Blazers':
        return 'Portland'
    return ' '.join(team.split()[:-1])

In [47]:
nba['Team'].str.split().apply(lambda x: ' '.join(x[:-1]))

0      Boston
1      Boston
2      Boston
3      Boston
4      Boston
        ...  
452      Utah
453      Utah
454      Utah
455      Utah
456      Utah
Name: Team, Length: 457, dtype: object

In [48]:
nba['City'] = nba['Team'].apply(find_city)

In [49]:
nba.apply('mean')

Number       1.767834e+01
Age          2.693873e+01
Weight       2.215230e+02
Salary_M     4.842684e+00
Salary       4.842684e+06
Weight_kg    1.004810e+02
dtype: float64

In [50]:
nba[['Age', 'Weight', 'Salary']].apply(lambda x: np.mean(np.sqrt(x)), axis=0)

Age          5.173461
Weight      14.857244
Salary    1924.563271
dtype: float64

In [51]:
def feet_to_cm(height):
    height = [int(x) for x in height.split('-')]
    inches = height[0]*12 + height[1]
    return inches*2.45

In [52]:
nba['height_cm'] = nba['Height'].apply(feet_to_cm)

In [53]:
nba

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary_M,Salary,Weight_kg,City,height_cm
0,Avery Bradley,Boston Celtics,0,PG,25.0,6-2,180.0,Texas,7.730337,7.730337e+06,81.646560,Boston,181.30
1,Jae Crowder,Boston Celtics,99,SF,25.0,6-6,235.0,Marquette,6.796117,6.796117e+06,106.594120,Boston,191.10
2,John Holland,Boston Celtics,30,SG,27.0,6-5,205.0,Boston University,,4.842684e+06,92.986360,Boston,188.65
3,R.J. Hunter,Boston Celtics,28,SG,22.0,6-5,185.0,Georgia State,1.148640,1.148640e+06,83.914520,Boston,188.65
4,Jonas Jerebko,Boston Celtics,8,PF,29.0,6-10,231.0,,5.000000,5.000000e+06,104.779752,Boston,200.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41,PF,20.0,6-10,234.0,Kentucky,2.239800,2.239800e+06,106.140528,Utah,200.90
453,Shelvin Mack,Utah Jazz,8,PG,26.0,6-3,203.0,Butler,2.433333,2.433333e+06,92.079176,Utah,183.75
454,Raul Neto,Utah Jazz,25,PG,24.0,6-1,179.0,,0.900000,9.000000e+05,81.192968,Utah,178.85
455,Tibor Pleiss,Utah Jazz,21,C,26.0,7-3,256.0,,2.900000,2.900000e+06,116.119552,Utah,213.15


## Writing to file


In [54]:
nba.to_csv('nba_new.csv') # writing to file (the default is to write with an index)