# Understanding how to describe data using pandas

In [1]:
import pandas as pd

In [2]:
data = {
    "Name": ["Ram", "Shyam", "Ghansyam", "Kam", "Aaram", "Sharam", "Gharam"],
    "Age": [12, 13, 14, 15, 16, 17, 18],
    "City": ["Delhi", "Mumbai", "Kolkata", "Chennai", "Bangalore", "Hyderabad", "Pune"],
    "Salary": [1000, 2000, 3000, 4000, 5000, 6000, 7000],
    "Performance Score": [56, 85, 95, 62, 75, 88, 90],
}

In [3]:
df = pd.DataFrame(data)
print(df)

       Name  Age       City  Salary  Performance Score
0       Ram   12      Delhi    1000                 56
1     Shyam   13     Mumbai    2000                 85
2  Ghansyam   14    Kolkata    3000                 95
3       Kam   15    Chennai    4000                 62
4     Aaram   16  Bangalore    5000                 75
5    Sharam   17  Hyderabad    6000                 88
6    Gharam   18       Pune    7000                 90


In [4]:
print("*****    Displaying the description of data  *****")
print(df.describe())

*****    Displaying the description of data  *****
             Age       Salary  Performance Score
count   7.000000     7.000000           7.000000
mean   15.000000  4000.000000          78.714286
std     2.160247  2160.246899          14.874075
min    12.000000  1000.000000          56.000000
25%    13.500000  2500.000000          68.500000
50%    15.000000  4000.000000          85.000000
75%    16.500000  5500.000000          89.000000
max    18.000000  7000.000000          95.000000


### Knowing more in detail about data

In [5]:
print(f'Shape: {df.shape}')
print(f'Column Names: {df.columns}')

Shape: (7, 5)
Column Names: Index(['Name', 'Age', 'City', 'Salary', 'Performance Score'], dtype='object')


### Now we understand how can we 
-   Select specific column
-   Filter rows
-   Combine multiple conditions

### How to single column from a dataframe

In [6]:
print('Names (Single column return series)')
print(df['Name'])

Names (Single column return series)
0         Ram
1       Shyam
2    Ghansyam
3         Kam
4       Aaram
5      Sharam
6      Gharam
Name: Name, dtype: object


### How to select multiple columns in pandas 

In [7]:
subset = df[['Name', 'Salary']]
print('Subset with name and salary')
print(subset)

Subset with name and salary
       Name  Salary
0       Ram    1000
1     Shyam    2000
2  Ghansyam    3000
3       Kam    4000
4     Aaram    5000
5    Sharam    6000
6    Gharam    7000


### How to filter rows

#### On single condition

In [8]:
high_salary = df[df['Salary'] > 5000]
print('Employees with salary more than 5000')
print(high_salary)

Employees with salary more than 5000
     Name  Age       City  Salary  Performance Score
5  Sharam   17  Hyderabad    6000                 88
6  Gharam   18       Pune    7000                 90


#### On multiple conditions

-   Using and ( & ) condition in pandas filtering

In [9]:
multiple_conditions_and = df[(df['Salary'] > 3000) & (df['Performance Score'] > 70)]
print('Employees with salary more than 3000 and performance score more than 70')
print(multiple_conditions_and)

Employees with salary more than 3000 and performance score more than 70
     Name  Age       City  Salary  Performance Score
4   Aaram   16  Bangalore    5000                 75
5  Sharam   17  Hyderabad    6000                 88
6  Gharam   18       Pune    7000                 90


-   Using or (  |  ) condition in pandas

In [10]:
multiple_conditions_or = df[(df['Age'] > 15) | (df['Performance Score'] > 75)]
print('Employees with age more than 15 or performance score more than 75')
print(multiple_conditions_or)

Employees with age more than 15 or performance score more than 75
       Name  Age       City  Salary  Performance Score
1     Shyam   13     Mumbai    2000                 85
2  Ghansyam   14    Kolkata    3000                 95
4     Aaram   16  Bangalore    5000                 75
5    Sharam   17  Hyderabad    6000                 88
6    Gharam   18       Pune    7000                 90
