In [2]:
import pandas as pd

In [4]:
import numpy as np

In [3]:
dict1 = {
    "name":['Tejas','Rohan','Sumit','Henry'],
    "marks":[99,95,90,60],
    "city":['Calicut','Bareilly','Kolkata','Bengaluru']
}

In [4]:
df = pd.DataFrame(dict1) # pd.DataFrame(dict1) creates a DataFrame from that dictionary using the pandas library

In [5]:
df

Unnamed: 0,name,marks,city
0,Tejas,99,Calicut
1,Rohan,95,Bareilly
2,Sumit,90,Kolkata
3,Henry,60,Bengaluru


**Each key in the dictionary becomes a column in the DataFrame, and each element in the lists becomes a row.**

In [6]:
#To export this dataframe into excel sheet we do:
df.to_csv('friends.csv')

In [7]:
# To remove index from friends.csv
df.to_csv('friends_index_false.csv', index=False)

In [8]:
#To fetch top 2 rows
df.head(2)

Unnamed: 0,name,marks,city
0,Tejas,99,Calicut
1,Rohan,95,Bareilly


In [9]:
#To fetch bottom 2 rows
df.tail(2)

Unnamed: 0,name,marks,city
2,Sumit,90,Kolkata
3,Henry,60,Bengaluru


`df.describe()` function is used to generate descriptive statistics of a DataFrame's numerical columns by default.

Here’s what it outputs (for numeric columns):

| Statistic | Meaning                               |
| --------- | ------------------------------------- |
| `count`   | Number of non-null (non-NaN) entries  |
| `mean`    | Average of the values                 |
| `std`     | Standard deviation                    |
| `min`     | Minimum value                         |
| `25%`     | 25th percentile (Q1 – lower quartile) |
| `50%`     | 50th percentile (median)              |
| `75%`     | 75th percentile (Q3 – upper quartile) |
| `max`     | Maximum value                         |


In [10]:
df.describe()

Unnamed: 0,marks
count,4.0
mean,86.0
std,17.720045
min,60.0
25%,82.5
50%,92.5
75%,96.0
max,99.0


In [11]:
sample = pd.read_csv('sample.csv')

In [12]:
sample

Unnamed: 0.1,Unnamed: 0,Name,Age,Department,Salary
0,0,Adam,28,HR,50000
1,1,Bob,35,Engineering,75000
2,2,Charlie,30,Marketing,62000
3,3,David,45,Engineering,88000
4,4,Eva,25,HR,48000


In [13]:
 #To fetch a particular column or element in that column
print(sample['Name'])
print("\n")
print(sample['Name'][0])

0       Adam
1        Bob
2    Charlie
3      David
4        Eva
Name: Name, dtype: object


Adam


In [14]:
#Change values
sample['Name'][0] = 'Adam'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  sample['Name'][0] = 'Adam'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['Name'][0] = 'Adam'


In [15]:
sample

Unnamed: 0.1,Unnamed: 0,Name,Age,Department,Salary
0,0,Adam,28,HR,50000
1,1,Bob,35,Engineering,75000
2,2,Charlie,30,Marketing,62000
3,3,David,45,Engineering,88000
4,4,Eva,25,HR,48000


In [16]:
sample.to_csv('sample.csv')

In [17]:
#We can change the index value also
sample.index = ['I', 'II', 'III', 'IV','V']
sample

#NOTE: To access rows we use index and to access columns we use column values

Unnamed: 0.1,Unnamed: 0,Name,Age,Department,Salary
I,0,Adam,28,HR,50000
II,1,Bob,35,Engineering,75000
III,2,Charlie,30,Marketing,62000
IV,3,David,45,Engineering,88000
V,4,Eva,25,HR,48000


## What is Pandas ?

* Pandas is an open source data analysis library written in python
* It leverages the power and speed of NumPy to make data analysis and preprocessing easy for data scientists
* It provides rich and highly robust data operations

* **Pandas has two types of data structures:**
    * a) **Series**- it's a one dimensional array with indexes, it stores a single column or row of data in a Dataframe.
    * b) **Dataframe**- it's a tabular spreadsheet like structure representing rows each of which contains one or multiple columns

* A one-dimensional array(labeled) capable of holding any type of data - Series
* A two dimensional array(labeled) structure with columns of potentially different types of data - DataFrame

In [20]:
series = pd.Series(np.random.rand(34))
series

0     0.664161
1     0.971376
2     0.327514
3     0.857274
4     0.519605
5     0.886203
6     0.204289
7     0.216484
8     0.270368
9     0.252023
10    0.010084
11    0.354044
12    0.565678
13    0.382716
14    0.159162
15    0.367717
16    0.505607
17    0.248417
18    0.762483
19    0.747347
20    0.682809
21    0.474945
22    0.011023
23    0.452321
24    0.716279
25    0.894894
26    0.989545
27    0.474069
28    0.296364
29    0.700648
30    0.057588
31    0.777798
32    0.181337
33    0.273826
dtype: float64

In [21]:
type(series)

pandas.core.series.Series

In [24]:
# Create a new DataFrame 'newdf' with random values
# np.random.rand(334, 5) creates a 2D array of shape (334 rows, 5 columns) with random float values between 0 and 1
# index = np.arange(334) sets the index of the DataFrame from 0 to 333
newdf = pd.DataFrame(np.random.rand(334, 5), index=np.arange(334))

# Display the first 5 rows of the DataFrame to quickly inspect the data
newdf.head()

# If head() is removed it'll display the entire DataFrame (all 334 rows and 5 columns)

Unnamed: 0,0,1,2,3,4
0,0.118131,0.400991,0.11383,0.23973,0.604339
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599
3,0.873743,0.093346,0.865838,0.508224,0.578296
4,0.733886,0.976332,0.859924,0.41382,0.765577


In [25]:
type(newdf)

pandas.core.frame.DataFrame

In [26]:
newdf.describe()

Unnamed: 0,0,1,2,3,4
count,334.0,334.0,334.0,334.0,334.0
mean,0.476088,0.486434,0.503559,0.51673,0.513634
std,0.293767,0.293555,0.294994,0.278173,0.297648
min,0.000253,0.005257,0.002371,0.007796,0.001243
25%,0.206731,0.22969,0.243939,0.273083,0.238909
50%,0.462565,0.471,0.513405,0.528762,0.547059
75%,0.733005,0.754894,0.761536,0.76205,0.764811
max,0.99297,0.999669,0.997351,0.999815,0.995788


In [27]:
newdf.dtypes

0    float64
1    float64
2    float64
3    float64
4    float64
dtype: object

In [28]:
newdf.columns

RangeIndex(start=0, stop=5, step=1)

In [32]:
newdf[0][0] = "Tejas"

In [33]:
newdf.to_numpy()

array([['Tejas', 0.4009907312965395, 0.11382971368415085,
        0.23973010140631534, 0.604338611463053],
       [0.9517989680176662, 0.3575035612187193, 0.40485546410255924,
        0.6121735330156349, 0.6887718904564821],
       [0.1554804594007999, 0.9431045471265374, 0.33314886209327244,
        0.040073983393939305, 0.9135994045313601],
       ...,
       [0.9388129693754709, 0.19728955806470694, 0.01838790784408062,
        0.9527965775225646, 0.30236362668052563],
       [0.08016773569987967, 0.05576237386470495, 0.6978854159957356,
        0.761128822990657, 0.15956595893447223],
       [0.7847014728540442, 0.722876967530773, 0.48637546795449316,
        0.9246334094872267, 0.9338657190386848]], dtype=object)

In [34]:
newdf[0][0] = 0.3

In [35]:
newdf

Unnamed: 0,0,1,2,3,4
0,0.3,0.400991,0.113830,0.239730,0.604339
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599
3,0.873743,0.093346,0.865838,0.508224,0.578296
4,0.733886,0.976332,0.859924,0.413820,0.765577
...,...,...,...,...,...
329,0.088859,0.656927,0.382779,0.830375,0.664178
330,0.390199,0.746012,0.066008,0.410332,0.056585
331,0.938813,0.197290,0.018388,0.952797,0.302364
332,0.080168,0.055762,0.697885,0.761129,0.159566


In [36]:
newdf.T #transpose

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,324,325,326,327,328,329,330,331,332,333
0,0.3,0.951799,0.15548,0.873743,0.733886,0.698646,0.473269,0.520496,0.320752,0.12134,...,0.893824,0.529781,0.753336,0.670271,0.739697,0.088859,0.390199,0.938813,0.080168,0.784701
1,0.400991,0.357504,0.943105,0.093346,0.976332,0.193709,0.796621,0.692281,0.278109,0.813215,...,0.304121,0.207286,0.885177,0.326405,0.011411,0.656927,0.746012,0.19729,0.055762,0.722877
2,0.11383,0.404855,0.333149,0.865838,0.859924,0.002371,0.330282,0.392593,0.50815,0.157032,...,0.729044,0.761447,0.706761,0.392449,0.366329,0.382779,0.066008,0.018388,0.697885,0.486375
3,0.23973,0.612174,0.040074,0.508224,0.41382,0.480561,0.017939,0.312053,0.090107,0.054808,...,0.818936,0.295881,0.085228,0.106104,0.489464,0.830375,0.410332,0.952797,0.761129,0.924633
4,0.604339,0.688772,0.913599,0.578296,0.765577,0.73064,0.899305,0.236036,0.029183,0.013272,...,0.023968,0.208559,0.706696,0.815253,0.565053,0.664178,0.056585,0.302364,0.159566,0.933866


In [37]:
newdf.head()

Unnamed: 0,0,1,2,3,4
0,0.3,0.400991,0.11383,0.23973,0.604339
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599
3,0.873743,0.093346,0.865838,0.508224,0.578296
4,0.733886,0.976332,0.859924,0.41382,0.765577


In [41]:
# Sort rows by index in descending order
print(newdf.sort_index(axis=0, ascending=False))

# Sort columns
print(newdf.sort_index(axis=1, ascending=False))

            0         1         2         3         4
333  0.784701  0.722877  0.486375  0.924633  0.933866
332  0.080168  0.055762  0.697885  0.761129  0.159566
331  0.938813  0.197290  0.018388  0.952797  0.302364
330  0.390199  0.746012  0.066008  0.410332  0.056585
329  0.088859  0.656927  0.382779  0.830375  0.664178
..        ...       ...       ...       ...       ...
4    0.733886  0.976332  0.859924  0.413820  0.765577
3    0.873743  0.093346  0.865838  0.508224  0.578296
2     0.15548  0.943105  0.333149  0.040074  0.913599
1    0.951799  0.357504  0.404855  0.612174  0.688772
0         0.3  0.400991  0.113830  0.239730  0.604339

[334 rows x 5 columns]
            4         3         2         1         0
0    0.604339  0.239730  0.113830  0.400991       0.3
1    0.688772  0.612174  0.404855  0.357504  0.951799
2    0.913599  0.040074  0.333149  0.943105   0.15548
3    0.578296  0.508224  0.865838  0.093346  0.873743
4    0.765577  0.413820  0.859924  0.976332  0.733886
..  

 When we say `axis=0` or `axis=1`, it doesn’t mean "operate on the axis", it means:
"The function will operate along this axis."

So what does that mean?

| `axis` | Direction      | Means it works **along**  | So operation affects...                        |
| ------ | -------------- | ------------------------- | ---------------------------------------------- |
| 0      | Vertical (↓)   | **rows** (down columns)   | Affects **columns** (e.g., sum of each column) |
| 1      | Horizontal (→) | **columns** (across rows) | Affects **rows** (e.g., sum of each row)       |



BUT — for `sort_index()`, it’s different:


| Function       | `axis=0` means                      | `axis=1` means                      |
| -------------- | ----------------------------------- | ----------------------------------- |
| `np.sum()`     | Sum **along rows**, affects columns | Sum **along columns**, affects rows |
| `sort_index()` | Sort by **row index**               | Sort by **column index**            |


In [42]:
newdf[0]

0           0.3
1      0.951799
2       0.15548
3      0.873743
4      0.733886
         ...   
329    0.088859
330    0.390199
331    0.938813
332    0.080168
333    0.784701
Name: 0, Length: 334, dtype: object

In [43]:
# it's a series
type(newdf[0])

pandas.core.series.Series

In [44]:
#view vs copy
newdf2 = newdf #newdf2 is a view of newdf

In [45]:
newdf2[0][0] = 9783

In [46]:
newdf

Unnamed: 0,0,1,2,3,4
0,9783,0.400991,0.113830,0.239730,0.604339
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599
3,0.873743,0.093346,0.865838,0.508224,0.578296
4,0.733886,0.976332,0.859924,0.413820,0.765577
...,...,...,...,...,...
329,0.088859,0.656927,0.382779,0.830375,0.664178
330,0.390199,0.746012,0.066008,0.410332,0.056585
331,0.938813,0.197290,0.018388,0.952797,0.302364
332,0.080168,0.055762,0.697885,0.761129,0.159566


In [47]:
#In copy changes won't get saved for any value
newdf2 = newdf.copy()


In [48]:
newdf2[0][0] = 123

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  newdf2[0][0] = 123
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  newdf2[0][0] = 123


In [49]:
newdf #no changes

Unnamed: 0,0,1,2,3,4
0,9783,0.400991,0.113830,0.239730,0.604339
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599
3,0.873743,0.093346,0.865838,0.508224,0.578296
4,0.733886,0.976332,0.859924,0.413820,0.765577
...,...,...,...,...,...
329,0.088859,0.656927,0.382779,0.830375,0.664178
330,0.390199,0.746012,0.066008,0.410332,0.056585
331,0.938813,0.197290,0.018388,0.952797,0.302364
332,0.080168,0.055762,0.697885,0.761129,0.159566


In [50]:
newdf.loc[0,0] = 654
newdf.head(2)

Unnamed: 0,0,1,2,3,4
0,654.0,0.400991,0.11383,0.23973,0.604339
1,0.951799,0.357504,0.404855,0.612174,0.688772


In [51]:
newdf.columns = list("ABCDE")

In [52]:
newdf

Unnamed: 0,A,B,C,D,E
0,654,0.400991,0.113830,0.239730,0.604339
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599
3,0.873743,0.093346,0.865838,0.508224,0.578296
4,0.733886,0.976332,0.859924,0.413820,0.765577
...,...,...,...,...,...
329,0.088859,0.656927,0.382779,0.830375,0.664178
330,0.390199,0.746012,0.066008,0.410332,0.056585
331,0.938813,0.197290,0.018388,0.952797,0.302364
332,0.080168,0.055762,0.697885,0.761129,0.159566


In [53]:
newdf.loc[0, 'A'] = 65445

In [54]:
newdf.head()

Unnamed: 0,A,B,C,D,E
0,65445.0,0.400991,0.11383,0.23973,0.604339
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599
3,0.873743,0.093346,0.865838,0.508224,0.578296
4,0.733886,0.976332,0.859924,0.41382,0.765577


In [58]:
newdf = newdf.drop(0, axis=0) #here axis = 0 means row-wise

In [59]:
newdf.head()

Unnamed: 0,A,B,C,D,E
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599
3,0.873743,0.093346,0.865838,0.508224,0.578296
4,0.733886,0.976332,0.859924,0.41382,0.765577
5,0.698646,0.193709,0.002371,0.480561,0.73064


In [60]:
newdf.loc[[1,2], ['C','D']]

Unnamed: 0,C,D
1,0.404855,0.612174
2,0.333149,0.040074


In [61]:
newdf.loc[[1,2], :]

Unnamed: 0,A,B,C,D,E
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599


In [63]:
newdf.loc[(newdf['A'] < 0.3) & (newdf['C'] > 0.1)] 

Unnamed: 0,A,B,C,D,E
2,0.15548,0.943105,0.333149,0.040074,0.913599
9,0.12134,0.813215,0.157032,0.054808,0.013272
10,0.117213,0.530396,0.831853,0.630483,0.995788
13,0.223316,0.951445,0.711523,0.052423,0.965667
14,0.263366,0.634214,0.676966,0.998299,0.109250
...,...,...,...,...,...
315,0.058152,0.737167,0.515949,0.438084,0.885963
319,0.133463,0.774319,0.463045,0.559692,0.776607
323,0.019741,0.760683,0.738372,0.243963,0.374280
329,0.088859,0.656927,0.382779,0.830375,0.664178


In [64]:
newdf.head(2)

Unnamed: 0,A,B,C,D,E
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599


In [65]:
newdf.iloc[0, 4] # 1st row, 4th column

0.6887718904564821

In [67]:
 newdf.iloc[[0,5],[1,2]]

Unnamed: 0,B,C
1,0.357504,0.404855
6,0.796621,0.330282


In [68]:
newdf.head(3)

Unnamed: 0,A,B,C,D,E
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599
3,0.873743,0.093346,0.865838,0.508224,0.578296


In [73]:
newdf.drop(['A'], axis=1) #drop column 'A' 

Unnamed: 0,B,C,D,E
1,0.357504,0.404855,0.612174,0.688772
2,0.943105,0.333149,0.040074,0.913599
3,0.093346,0.865838,0.508224,0.578296
4,0.976332,0.859924,0.413820,0.765577
5,0.193709,0.002371,0.480561,0.730640
...,...,...,...,...
329,0.656927,0.382779,0.830375,0.664178
330,0.746012,0.066008,0.410332,0.056585
331,0.197290,0.018388,0.952797,0.302364
332,0.055762,0.697885,0.761129,0.159566


Here this is not droping in our newdf these operations are done only on copy of newdf no changes are done on existing newdf

In [75]:
newdf.head()

Unnamed: 0,A,B,C,D,E
1,0.951799,0.357504,0.404855,0.612174,0.688772
2,0.15548,0.943105,0.333149,0.040074,0.913599
3,0.873743,0.093346,0.865838,0.508224,0.578296
4,0.733886,0.976332,0.859924,0.41382,0.765577
5,0.698646,0.193709,0.002371,0.480561,0.73064


So to make changes in the original newdf then use inplace = True

In [76]:
newdf.drop(['E'], axis=1, inplace=True)

In [77]:
newdf.head()

Unnamed: 0,A,B,C,D
1,0.951799,0.357504,0.404855,0.612174
2,0.15548,0.943105,0.333149,0.040074
3,0.873743,0.093346,0.865838,0.508224
4,0.733886,0.976332,0.859924,0.41382
5,0.698646,0.193709,0.002371,0.480561


In [79]:
#To reset index and not return extra column for index:
newdf.reset_index(drop=True, inplace=True)

In [80]:
newdf.head()

Unnamed: 0,A,B,C,D
0,0.951799,0.357504,0.404855,0.612174
1,0.15548,0.943105,0.333149,0.040074
2,0.873743,0.093346,0.865838,0.508224
3,0.733886,0.976332,0.859924,0.41382
4,0.698646,0.193709,0.002371,0.480561


In [82]:
newdf['B'] = None #Not a good practice always use 'loc' for this

In [83]:
newdf

Unnamed: 0,A,B,C,D
0,0.951799,,0.404855,0.612174
1,0.15548,,0.333149,0.040074
2,0.873743,,0.865838,0.508224
3,0.733886,,0.859924,0.413820
4,0.698646,,0.002371,0.480561
...,...,...,...,...
328,0.088859,,0.382779,0.830375
329,0.390199,,0.066008,0.410332
330,0.938813,,0.018388,0.952797
331,0.080168,,0.697885,0.761129


In [84]:
newdf.loc[:, ['B']] = 56

In [85]:
newdf.head()

Unnamed: 0,A,B,C,D
0,0.951799,56,0.404855,0.612174
1,0.15548,56,0.333149,0.040074
2,0.873743,56,0.865838,0.508224
3,0.733886,56,0.859924,0.41382
4,0.698646,56,0.002371,0.480561


In [7]:
df = pd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'],
                   "toy": [np.nan, 'Batmobile', 'Bullwhip'],
                   "born": [pd.NaT, pd.Timestamp("1940-04-25"),
                            pd.NaT]})

In [8]:
df.head()

Unnamed: 0,name,toy,born
0,Alfred,,NaT
1,Batman,Batmobile,1940-04-25
2,Catwoman,Bullwhip,NaT


In [9]:
df.dropna()

Unnamed: 0,name,toy,born
1,Batman,Batmobile,1940-04-25


In [10]:
dupe_df  = pd.DataFrame({"name": ['Alfred', 'Batman', 'Alfred'],
                   "toy": [np.nan, 'Batmobile', 'Bullwhip'],
                   "born": [pd.NaT, pd.Timestamp("1940-04-25"),
                            pd.NaT]})

In [12]:
#remove duplicates form 'name'
dupe_df.drop_duplicates(subset=['name'])

Unnamed: 0,name,toy,born
0,Alfred,,NaT
1,Batman,Batmobile,1940-04-25


In [14]:
#remove all duplicates from 'name'
dupe_df.drop_duplicates(subset=['name'], keep=False)

Unnamed: 0,name,toy,born
1,Batman,Batmobile,1940-04-25


In [16]:
#To keep last duplicate of 'name'
dupe_df.drop_duplicates(subset=['name'], keep='last')

Unnamed: 0,name,toy,born
1,Batman,Batmobile,1940-04-25
2,Alfred,Bullwhip,NaT


In [17]:
df.shape

(3, 3)

In [18]:
df.info

<bound method DataFrame.info of        name        toy       born
0    Alfred        NaN        NaT
1    Batman  Batmobile 1940-04-25
2  Catwoman   Bullwhip        NaT>

In [21]:
df['toy'].value_counts(dropna=False) # include NaN

toy
NaN          1
Batmobile    1
Bullwhip     1
Name: count, dtype: int64

In [22]:
df['toy'].value_counts(dropna=True) # without including NaN

toy
Batmobile    1
Bullwhip     1
Name: count, dtype: int64

In [23]:
df.notnull()

Unnamed: 0,name,toy,born
0,True,False,False
1,True,True,True
2,True,True,False


### create a dataframe which contains only integers with 3 rows and 2 columns
#### run following dataframe methods on them:

df.describe()
df.mean()
df.corr()
df.count()
df.max()
df.min()
df.median()
df.std()