Pandas
- Fast, flexible, and designed to work with tabular data.
- Designed to make work easier with structured data.

--- Robust for loading the data from files like CSV, Excel etc.

--- Easy handling of any kind of missing values in the data (NaN Values)

--- Size Mutability: Columns can be inserted and deleted

In [1]:
import numpy as np
import pandas as pd

In [None]:
pd.__version__

'2.1.4'

In [None]:
# Pandas Dataframe - A two-D Structure where data is aligned in a tabular form.

In [None]:
#First series of df(DataFrame)
data = np.array(['a','b','c','d'])
s1 = pd.Series(data)
print(s1)

0    a
1    b
2    c
3    d
dtype: object


In [None]:
num_series = pd.Series([1,2,3,4], index=['a','b','c','d'])
print(num_series)

a    1
b    2
c    3
d    4
dtype: int64


In [None]:
#First Pandas DataFrame
two_d = np.random.randint(5,10, (3,4))
first_df = pd.DataFrame(two_d, columns = ["a", "b", "c", "d"])
print(first_df)

   a  b  c  d
0  6  9  6  5
1  5  5  6  8
2  5  7  6  5


In [None]:
two_d = np.random.randint(10,35, (3,4))
hello_df = pd.DataFrame(two_d, columns = ["a", "b", "c", "d"], index = ["A", "B", "C"])
print(hello_df)
#

    a   b   c   d
A  13  14  33  15
B  34  26  32  23
C  28  12  19  29


In [None]:
num_series = pd.Series([1,2,3,4,5,6], index = ['a','b','c','d','e','f'], name = "Number")
df = pd.DataFrame(num_series)
print(df)

   Number
a       1
b       2
c       3
d       4
e       5
f       6


In [3]:
dilip_fav = np.random.randint(10,35, (3,4))
dilip_df = pd.DataFrame(dilip_fav, columns = ["a", "b", "c", "d"], index = ["English", "Telugu", "Social"])
print(dilip_df)

          a   b   c   d
English  26  11  34  34
Telugu   34  15  12  20
Social   14  23  29  34


In [4]:
# prompt: create a dataframe with 2 columns and 10 rows

import pandas as pd
import numpy as np

data = np.random.rand(10, 2)  # Generate random data
df = pd.DataFrame(data, columns=['Column1', 'Column2'])
print(df)


    Column1   Column2
0  0.630017  0.381914
1  0.681989  0.153395
2  0.734926  0.897958
3  0.220032  0.300218
4  0.965603  0.784941
5  0.476778  0.287787
6  0.869150  0.906831
7  0.569745  0.547062
8  0.076127  0.822950
9  0.616582  0.499658


In [6]:
list_of_lists = [["Dilip", 24], ["Vasu", 2], ["Neimisha", 27]]
df = pd.DataFrame(list_of_lists, columns = ["Name", "Age"], index = (1,2,3))
print(df)

       Name  Age
1     Dilip   24
2      Vasu    2
3  Neimisha   27


In [12]:
employee_dict = {"Employee Name": ["Dilip", "Vasu", "Neimisha", "Kishore"], "Age": [24, 2, 27, 31], "Income $": [65000,
                                                                                                               55000, 60000, 85000]}
df = pd.DataFrame(employee_dict)
print(df)

  Employee Name  Age  Income $
0         Dilip   24     65000
1          Vasu    2     55000
2      Neimisha   27     60000
3       Kishore   31     85000


In [13]:
series_dict = {"First Series": pd.Series([1,2,3,4]), "Second Series": pd.Series([5,6,7,8])}
df = pd.DataFrame(series_dict)
print(df)

   First Series  Second Series
0             1              5
1             2              6
2             3              7
3             4              8


In [15]:
list_of_dict = [{"a": 1, "b": 2}, {"a": 5, "b": 10, "c": 20}]
df = pd.DataFrame(list_of_dict, index = (1,2))
print(df)

   a   b     c
1  1   2   NaN
2  5  10  20.0


In [21]:
list_a = ["Guntur Kaaram", "Khaleja", "Athadu"]
list_b = ["1", "Businessman", "Pokiri"]
list_of_tuples = list(zip(list_a, list_b))
#print(list_of_tuples)
df = pd.DataFrame(list_of_tuples, columns = ["MBx3V", "MBxPuri"], index = (1,2,3))
print(df)

           MBx3V      MBxPuri
1  Guntur Kaaram            1
2        Khaleja  Businessman
3         Athadu       Pokiri


In [26]:
#Create a DataFrame with 100 rows and 5 Different columns
our_array = np.random.randn(100,5)
#print(our_array)
df = pd.DataFrame(our_array, columns = ["a", "b", "c", "d", "e"])
print(df)

           a         b         c         d         e
0  -0.107680  0.469468  2.131453 -0.609228  1.270563
1   0.515775  1.666263  1.573866  0.467411 -0.148150
2  -0.713023 -0.521624 -0.349166 -0.583255  0.472533
3  -0.150903  1.045938 -1.400772 -1.610778  0.195928
4  -0.028928  1.924387 -0.186663  0.291858  1.020045
..       ...       ...       ...       ...       ...
95  0.880404  0.158345  0.654562  1.507769  2.514581
96 -0.220484 -0.754071  0.282912  0.512830 -0.766497
97  0.760020 -0.635551  0.573110  0.274322 -0.435953
98  0.472688  2.122444  0.351854  1.177349  0.801094
99  0.195648  0.355175  1.640748 -0.658826 -0.130411

[100 rows x 5 columns]


Unnamed: 0,a,b,c,d,e
0,0.128755,2.18057,1.459356,-0.485941,0.060003
1,-0.287279,1.167293,1.86195,-1.815995,0.969489
2,-0.365775,1.21094,-1.615533,0.19341,0.372486
3,0.519851,-0.435057,-1.335022,0.057082,-1.067158
4,-0.097838,-1.505847,1.353263,0.051439,0.732169
