# 101 Pandas Exercises
https://www.machinelearningplus.com/python/101-pandas-exercises-python/

In [2]:
import pandas as pd
import numpy as np

## How to import pandas and check the version

In [4]:
dir(pd)

['BooleanDtype',
 'Categorical',
 'CategoricalDtype',
 'CategoricalIndex',
 'DataFrame',
 'DateOffset',
 'DatetimeIndex',
 'DatetimeTZDtype',
 'ExcelFile',
 'ExcelWriter',
 'Flags',
 'Float32Dtype',
 'Float64Dtype',
 'Float64Index',
 'Grouper',
 'HDFStore',
 'Index',
 'IndexSlice',
 'Int16Dtype',
 'Int32Dtype',
 'Int64Dtype',
 'Int64Index',
 'Int8Dtype',
 'Interval',
 'IntervalDtype',
 'IntervalIndex',
 'MultiIndex',
 'NA',
 'NaT',
 'NamedAgg',
 'Period',
 'PeriodDtype',
 'PeriodIndex',
 'RangeIndex',
 'Series',
 'SparseDtype',
 'StringDtype',
 'Timedelta',
 'TimedeltaIndex',
 'Timestamp',
 'UInt16Dtype',
 'UInt32Dtype',
 'UInt64Dtype',
 'UInt64Index',
 'UInt8Dtype',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__docformat__',
 '__file__',
 '__getattr__',
 '__git_version__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_config',
 '_hashtable',
 '_is_numpy_dev',
 '_lib',
 '_libs',
 '_np_version_under1p18',
 '_testing',
 '_tslib',
 '_typing',
 

In [5]:
print(pd.__version__)
print(pd.show_versions(as_json=True))

1.3.5
{
  "system": {
    "commit": "66e3805b8cabe977f40c05259cc3fcf7ead5687d",
    "python": "3.7.1.final.0",
    "python-bits": 64,
    "OS": "Windows",
    "OS-release": "10",
    "Version": "10.0.19041",
    "machine": "AMD64",
    "processor": "AMD64 Family 23 Model 24 Stepping 1, AuthenticAMD",
    "byteorder": "little",
    "LC_ALL": null,
    "LANG": null,
    "LOCALE": {
      "language-code": null,
      "encoding": null
    }
  },
  "dependencies": {
    "pandas": "1.3.5",
    "numpy": "1.21.2",
    "pytz": "2021.3",
    "dateutil": "2.8.2",
    "pip": "21.2.4",
    "setuptools": "58.0.4",
    "Cython": null,
    "pytest": null,
    "hypothesis": null,
    "sphinx": "1.8.5",
    "blosc": null,
    "feather": null,
    "xlsxwriter": "3.0.2",
    "lxml.etree": null,
    "html5lib": null,
    "pymysql": null,
    "psycopg2": null,
    "jinja2": "2.11.3",
    "IPython": "7.29.0",
    "pandas_datareader": null,
    "bs4": null,
    "bottleneck": "1.3.2",
    "fsspec": "2021.10.1"

## How to create a series from a list, numpy array and dict?

In [3]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

In [9]:
mydict

{'a': 0,
 'b': 1,
 'c': 2,
 'e': 3,
 'd': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'j': 9,
 'k': 10,
 'l': 11,
 'm': 12,
 'n': 13,
 'o': 14,
 'p': 15,
 'q': 16,
 'r': 17,
 's': 18,
 't': 19,
 'u': 20,
 'v': 21,
 'w': 22,
 'x': 23,
 'y': 24,
 'z': 25}

In [8]:
print(pd.Series(mylist, index=myarr).head(2))
print(pd.Series(myarr).head(2))
print(pd.Series(mydict).head(2))

0    a
1    b
dtype: object
0    0
1    1
dtype: int32
a    0
b    1
dtype: int32


## How to convert the index of a series into a column of a dataframe

In [10]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

In [19]:
pd.DataFrame(ser).reset_index().head(2)

Unnamed: 0,index,0
0,a,0
1,b,1


In [16]:
ser.to_frame().reset_index().head(2)

Unnamed: 0,index,0
0,a,0
1,b,1


## How to combine many series to form a dataframe

In [20]:
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [24]:
pd.concat([ser1, ser2], axis=1).head(2)

Unnamed: 0,0,1
0,a,0
1,b,1


In [26]:
pd.DataFrame({'col1': ser1, 'col2': ser2}).head(2)

Unnamed: 0,col1,col2
0,a,0
1,b,1


## How to assign name to the series’ index

In [27]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

In [28]:
ser.name = 'alphabets'

In [30]:
ser.head(2)

0    a
1    b
Name: alphabets, dtype: object

## How to get the items of series A not present in series B

In [31]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [46]:
pd.Series(list(set(ser1) - set(ser2)))

0    1
1    2
2    3
dtype: int64

In [44]:
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

## How to get the items not common to both series A and series B

In [47]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [48]:
ser1 - ser2

0   -3
1   -3
2   -3
3   -3
4   -3
dtype: int64

In [52]:
ser_u = pd.Series(np.union1d(ser1, ser2))
ser_i = pd.Series(np.intersect1d(ser1, ser2))

In [51]:
ser_u

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
dtype: int64

In [54]:
ser_i

0    4
1    5
dtype: int64

In [56]:
ser_u[~ser_u.isin(ser_i)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64