In [1]:
import pandas as pd

In [2]:
groceries = pd.Series(data=[30,6,'Yes','No'], index=['eggs', 'apples', 'milk', 'bread'])

In [3]:
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [4]:
groceries.shape

(4,)

In [5]:
groceries.ndim

1

In [6]:
groceries.size

4

In [7]:
groceries.index

Index(['eggs', 'apples', 'milk', 'bread'], dtype='object')

In [8]:
groceries.values

array([30, 6, 'Yes', 'No'], dtype=object)

In [9]:
'banana' in groceries

False

In [10]:
'bread' in groceries

True

In [11]:
groceries['eggs']

30

In [12]:
groceries[['eggs','apples']]

eggs      30
apples     6
dtype: object

In [13]:
groceries[0]

30

In [14]:
groceries[-1]

'No'

In [15]:
groceries[[0,1]]

eggs      30
apples     6
dtype: object

In [16]:
groceries['eggs'] = 2

In [17]:
groceries.drop('apples')

eggs       2
milk     Yes
bread     No
dtype: object

In [18]:
groceries

eggs        2
apples      6
milk      Yes
bread      No
dtype: object

In [19]:
groceries.drop('apples', inplace=True)

In [20]:
groceries

eggs       2
milk     Yes
bread     No
dtype: object

## Arithmetic Operations

In [21]:
fruits = pd.Series([10, 6, 3], ['apples', 'oranges', 'bananas'])
fruits

apples     10
oranges     6
bananas     3
dtype: int64

In [22]:
fruits * 2

apples     20
oranges    12
bananas     6
dtype: int64

In [23]:
fruits - 1

apples     9
oranges    5
bananas    2
dtype: int64

In [24]:
fruits *5

apples     50
oranges    30
bananas    15
dtype: int64

In [25]:
import numpy as np

In [26]:
np.sqrt(fruits)

apples     3.162278
oranges    2.449490
bananas    1.732051
dtype: float64

In [27]:
np.power(fruits, 2)

apples     100
oranges     36
bananas      9
dtype: int64

In [28]:
fruits['bananas'] + 2

5

In [29]:
fruits.iloc[0] = 2

In [30]:
fruits

apples     2
oranges    6
bananas    3
dtype: int64

In [31]:
fruits.loc[['apples','bananas']] / 2 

apples     1.0
bananas    1.5
dtype: float64

In [32]:
import pandas as pd

distance_from_sun = [149.6, 1433.5, 227.9, 108.2, 778.6]

planets = ['Earth', 'Saturn', 'Mars', 'Venus', 'Jupiter']

dist_planets = pd.Series(distance_from_sun, planets)

In [33]:
time_lights = dist_planets / 18

In [34]:
close_planets = time_lights[time_lights < 40]

In [35]:
close_planets

Earth     8.311111
Mars     12.661111
Venus     6.011111
dtype: float64

In [36]:
items = {'Bob': pd.Series(data = [245,55,25], index = ['bike','pants','watch']),
         'Alice': pd.Series(data = [40,110,550,45], index = ['book', 'glasses', 'bike', 'pants'])}

In [37]:
type(items)

dict

In [38]:
shopping_carts = pd.DataFrame(items)
shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,550.0
book,,40.0
glasses,,110.0
pants,55.0,45.0
watch,25.0,


In [39]:
items = {'Bob': pd.Series(data = [245,55,25]),
         'Alice': pd.Series(data = [40,110,550,45])}

In [40]:
shopping_carts = pd.DataFrame(items)

In [41]:
shopping_carts

Unnamed: 0,Bob,Alice
0,245.0,40
1,55.0,110
2,25.0,550
3,,45


In [42]:
shopping_carts.index

RangeIndex(start=0, stop=4, step=1)

In [43]:
shopping_carts.columns

Index(['Bob', 'Alice'], dtype='object')

In [44]:
shopping_carts.values

array([[245.,  40.],
       [ 55., 110.],
       [ 25., 550.],
       [ nan,  45.]])

In [45]:
shopping_carts.shape

(4, 2)

In [46]:
shopping_carts.ndim

2

In [47]:
shopping_carts.size

8

In [48]:
bob_shopping_cart = pd.DataFrame(items, columns=['Bob'])
bob_shopping_cart

Unnamed: 0,Bob
0,245
1,55
2,25


In [49]:
sel_shopping_cart = pd.DataFrame(items, index=['pants', 'book'], columns=['Alice'])
sel_shopping_cart

Unnamed: 0,Alice
pants,
book,


In [50]:
data = {'Integers': [1,2,3,4,5,6], 'Floats': [4.5, 3.6, 2.0, 9.8, 1.1, 10.0]}
data_df = pd.DataFrame(data)
data_df

Unnamed: 0,Integers,Floats
0,1,4.5
1,2,3.6
2,3,2.0
3,4,9.8
4,5,1.1
5,6,10.0


In [51]:
data_df.index = ['label1','label2','label3','label4','label5','label6']

In [52]:
data_df

Unnamed: 0,Integers,Floats
label1,1,4.5
label2,2,3.6
label3,3,2.0
label4,4,9.8
label5,5,1.1
label6,6,10.0


In [53]:
data_df[['Integers']]

Unnamed: 0,Integers
label1,1
label2,2
label3,3
label4,4
label5,5
label6,6


In [54]:
data_df.loc[['label1']]

Unnamed: 0,Integers,Floats
label1,1,4.5


In [55]:
data_df['Floats']['label1']

4.5

In [56]:
data_df['Unknown'] = [12, 23.4, 23, 2, 45,33]

In [57]:
data_df

Unnamed: 0,Integers,Floats,Unknown
label1,1,4.5,12.0
label2,2,3.6,23.4
label3,3,2.0,23.0
label4,4,9.8,2.0
label5,5,1.1,45.0
label6,6,10.0,33.0


In [58]:
data_df['Unknown2'] = data_df['Floats'] + data_df['Unknown']

In [59]:
data_df

Unnamed: 0,Integers,Floats,Unknown,Unknown2
label1,1,4.5,12.0,16.5
label2,2,3.6,23.4,27.0
label3,3,2.0,23.0,25.0
label4,4,9.8,2.0,11.8
label5,5,1.1,45.0,46.1
label6,6,10.0,33.0,43.0


In [60]:
newitems = [{'Floats': 20, 'pants': 30, 'watches': 33, 'glasses': 4}]
new_label = pd.DataFrame(newitems, index=['label7'])
new_label

Unnamed: 0,Floats,pants,watches,glasses
label7,20,30,33,4


In [61]:
data_df.append(new_label)

Unnamed: 0,Integers,Floats,Unknown,Unknown2,pants,watches,glasses
label1,1.0,4.5,12.0,16.5,,,
label2,2.0,3.6,23.4,27.0,,,
label3,3.0,2.0,23.0,25.0,,,
label4,4.0,9.8,2.0,11.8,,,
label5,5.0,1.1,45.0,46.1,,,
label6,6.0,10.0,33.0,43.0,,,
label7,,20.0,,,30.0,33.0,4.0


In [62]:
data_df

Unnamed: 0,Integers,Floats,Unknown,Unknown2
label1,1,4.5,12.0,16.5
label2,2,3.6,23.4,27.0
label3,3,2.0,23.0,25.0
label4,4,9.8,2.0,11.8
label5,5,1.1,45.0,46.1
label6,6,10.0,33.0,43.0


In [63]:
data_df['new_watches'] = data_df['Integers'][1:]

In [64]:
data_df.shape

(6, 5)

In [65]:
data_df.insert(2, 'DataType', [23,22,1,23,45,6])
data_df

Unnamed: 0,Integers,Floats,DataType,Unknown,Unknown2,new_watches
label1,1,4.5,23,12.0,16.5,
label2,2,3.6,22,23.4,27.0,2.0
label3,3,2.0,1,23.0,25.0,3.0
label4,4,9.8,23,2.0,11.8,4.0
label5,5,1.1,45,45.0,46.1,5.0
label6,6,10.0,6,33.0,43.0,6.0


In [66]:
data_df.pop('new_watches')
data_df

Unnamed: 0,Integers,Floats,DataType,Unknown,Unknown2
label1,1,4.5,23,12.0,16.5
label2,2,3.6,22,23.4,27.0
label3,3,2.0,1,23.0,25.0
label4,4,9.8,23,2.0,11.8
label5,5,1.1,45,45.0,46.1
label6,6,10.0,6,33.0,43.0


In [68]:
data_df = data_df.drop(['Unknown', 'Unknown2'], axis=1)
data_df

Unnamed: 0,Integers,Floats,DataType
label1,1,4.5,23
label2,2,3.6,22
label3,3,2.0,1
label4,4,9.8,23
label5,5,1.1,45
label6,6,10.0,6


In [70]:
data_df = data_df.rename(columns = {'DataType':'Data'})

In [71]:
data_df

Unnamed: 0,Integers,Floats,Data
label1,1,4.5,23
label2,2,3.6,22
label3,3,2.0,1
label4,4,9.8,23
label5,5,1.1,45
label6,6,10.0,6


In [72]:
data_df = data_df.rename(index = {'label1':'label01'})

In [73]:
data_df

Unnamed: 0,Integers,Floats,Data
label01,1,4.5,23
label2,2,3.6,22
label3,3,2.0,1
label4,4,9.8,23
label5,5,1.1,45
label6,6,10.0,6


In [74]:
data_df = data_df.set_index('Data')
data_df

Unnamed: 0_level_0,Integers,Floats
Data,Unnamed: 1_level_1,Unnamed: 2_level_1
23,1,4.5
22,2,3.6
1,3,2.0
23,4,9.8
45,5,1.1
6,6,10.0


In [78]:
data_df = data_df.reset_index()

In [80]:
data_df

Unnamed: 0,Data,Integers,Floats
0,23,1,4.5
1,22,2,3.6
2,1,3,2.0
3,23,4,9.8
4,45,5,1.1
5,6,6,10.0


In [82]:
data_df['Data'][1]

'NaN'

In [83]:
data_Df

NameError: name 'data_Df' is not defined

In [89]:
items = [{'bikes': 20, 'pants': 30, 'watches': 35, 'shirts': 15, 'shoes':8, 'suits':45},
{'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5, 'shirts': 2, 'shoes':5, 'suits':7},
{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4, 'shoes':10}]

In [90]:
store_items = pd.DataFrame(items, index = ['store 1', 'store 2', 'store 3'])
store_items

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,,10,,4.0


In [95]:
x = store_items.isnull().sum().sum()

In [96]:
x

3

In [98]:
store_items.dropna(axis=0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 2,15,5,10,2.0,5,7.0,50.0


In [99]:
store_items.dropna(axis=1)

Unnamed: 0,bikes,pants,watches,shoes
store 1,20,30,35,8
store 2,15,5,10,5
store 3,20,30,35,10


In [100]:
store_items.fillna(0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,0.0
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,0.0,10,0.0,4.0


In [101]:
store_items.fillna(method='ffill', axis=0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,2.0,10,7.0,4.0


In [102]:
store_items.fillna(method='ffill', axis=1)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20.0,30.0,35.0,15.0,8.0,45.0,45.0
store 2,15.0,5.0,10.0,2.0,5.0,7.0,50.0
store 3,20.0,30.0,35.0,35.0,10.0,10.0,4.0


In [105]:
store_items.interpolate(method='linear', axis=0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,2.0,10,7.0,4.0


In [106]:
import pandas as pd
import numpy as np

pd.set_option('precision', 2)

In [109]:
books = pd.Series(data = ['Great Expectations', 'Of Mice and Men', 'Romeo and Juliet', 'The Time Machine', 'Alice in Wonderland' ])
authors = pd.Series(data = ['Charles Dickens', 'John Steinbeck', 'William Shakespeare', ' H. G. Wells', 'Lewis Carroll' ])

In [110]:
user_1 = pd.Series(data = [3.2, np.nan, 2.5])
user_2 = pd.Series(data = [5., 1.3, 4.0, 3.8])
user_3 = pd.Series(data = [2.0, 2.3, np.nan, 4])
user_4 = pd.Series(data = [4, 3.5, 4, 5, 4.2])

In [111]:
dat = {'Book Title': books,
       'Author': authors,
       'User 1': user_1,
       'User 2': user_2,
       'User 3': user_3,
       'User 4': user_4
      }

In [112]:
book_ratings = pd.DataFrame(dat)

In [113]:
book_ratings.fillna(book_ratings.mean(), inplace=True)

In [114]:
book_ratings

Unnamed: 0,Book Title,Author,User 1,User 2,User 3,User 4
0,Great Expectations,Charles Dickens,3.2,5.0,2.0,4.0
1,Of Mice and Men,John Steinbeck,2.85,1.3,2.3,3.5
2,Romeo and Juliet,William Shakespeare,2.5,4.0,2.77,4.0
3,The Time Machine,H. G. Wells,2.85,3.8,4.0,5.0
4,Alice in Wonderland,Lewis Carroll,2.85,3.53,2.77,4.2


In [123]:
import pandas as pd

In [124]:
df = pd.read_csv('Datasets/fake_company.csv')

df.head()

Unnamed: 0,Year,Name,Department,Age,Salary
0,1990,Alice,HR,25,50000
1,1990,Bob,RD,30,48000
2,1990,Charlie,Admin,45,55000
3,1991,Alice,HR,26,52000
4,1991,Bob,RD,31,50000


In [125]:
df.groupby(['Year'])['Salary'].sum()

Year
1990    153000
1991    162000
1992    174000
Name: Salary, dtype: int64

In [126]:
df.groupby(['Year', 'Department'])['Salary'].sum()

Year  Department
1990  Admin         55000
      HR            50000
      RD            48000
1991  Admin         60000
      HR            52000
      RD            50000
1992  Admin         62000
      HR            60000
      RD            52000
Name: Salary, dtype: int64

In [127]:
df.groupby(['Name'])['Salary'].sum()

Name
Alice      162000
Bob        150000
Charlie    177000
Name: Salary, dtype: int64