# pandas is a popular Python-based data analysis toolkit which can be imported using import pandas as pd

In [None]:
import pandas as pd
import numpy as np

### Creating a dictionary

In [7]:
dict = {
    "name": ["john wick", "john cena", "marvick", "logan"],
    "age": [38, 48, 55, 178],
    "city": ["New York", "Las Angeles", "california", "Michigan"]
}

### Convert a dictionary to Pandas Dataframe using df = pd. DataFrame

In [3]:
df = pd.DataFrame(dict)

### df.head() --> Return the first n rows

In [11]:
df.head(3)

Unnamed: 0,name,age,city
0,john wick,38,New York
1,john cena,48,Las Angeles
2,marvick,55,california


### df.tail() --> Return the last n rows

In [12]:
df.tail(3)

Unnamed: 0,name,age,city
1,john cena,48,Las Angeles
2,marvick,55,california
3,logan,178,Michigan


### --->Pandas DataFrame to_csv() function converts DataFrame into CSV data

In [13]:
df.to_csv("superheros.csv", index=False)

### The describe() method returns description of the data in the DataFrame. If the DataFrame contains numerical data

In [14]:
df.describe()

Unnamed: 0,age
count,4.0
mean,79.75
std,65.870454
min,38.0
25%,45.5
50%,51.5
75%,85.75
max,178.0


### read_csv is used to load a CSV file as a pandas dataframe

In [39]:
titanic = pd.read_csv("titanic.csv")

In [40]:
titanic.head(4)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S


### Make all name characters lowercase use * titanic["Name"].str.lower()

In [83]:
titanic["Name"].str.lower()

0                                braund, mr. owen harris
1      cumings, mrs. john bradley (florence briggs th...
2                                 heikkinen, miss. laina
3           futrelle, mrs. jacques heath (lily may peel)
4                               allen, mr. william henry
                             ...                        
886                                montvila, rev. juozas
887                         graham, miss. margaret edith
888             johnston, miss. catherine helen "carrie"
889                                behr, mr. karl howell
890                                  dooley, mr. patrick
Name: Name, Length: 891, dtype: object

### How to select a subset of a DataFrame

In [58]:
# To select a single column, use square brackets [] with the column name of the column of interest.

ages = titanic["Age"]

In [59]:
ages.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: Age, dtype: float64

### To select multiple columns, use a list of column names within the selection brackets [].

In [64]:
age_sex = titanic[["Age", "Sex"]]

In [65]:
age_sex.head()

Unnamed: 0,Age,Sex
0,22.0,male
1,38.0,female
2,26.0,female
3,35.0,female
4,35.0,male


### Create a new column Surname that contains the surname of the passengers by extracting the part before the comma.

In [45]:
titanic["Surname"] = titanic["Name"].str.split(",").str.get(0)

In [46]:
titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Surname
0,1,0,3,"braund, mr. owen harris",male,22.0,1,0,A/5 21171,7.25,,S,braund
1,2,1,1,"cumings, mrs. john bradley (florence briggs th...",female,38.0,1,0,PC 17599,71.2833,C85,C,cumings
2,3,1,3,"heikkinen, miss. laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,heikkinen
3,4,1,1,"futrelle, mrs. jacques heath (lily may peel)",female,35.0,1,0,113803,53.1,C123,S,futrelle
4,5,0,3,"allen, mr. william henry",male,35.0,0,0,373450,8.05,,S,allen


### Creating new DataFrame using numpy

In [72]:
new_df = pd.DataFrame(np.random.rand(200,5), index=np.arange(200))

In [73]:
new_df.head()

Unnamed: 0,0,1,2,3,4
0,0.569567,0.165447,0.971081,0.255128,0.166589
1,0.718493,0.293243,0.366101,0.165561,0.007704
2,0.09142,0.430994,0.428845,0.883441,0.308296
3,0.38369,0.399853,0.68988,0.012218,0.107026
4,0.646413,0.910095,0.08784,0.86951,0.160121


## Sort object by labels (along an axis)
### pandas.DataFrame.sort_index, axis=0 is rows.  axis=1 is columns.

In [78]:
new_df.sort_index(axis=1, ascending=False)

Unnamed: 0,4,3,2,1,0
0,0.166589,0.255128,0.971081,0.165447,0.569567
1,0.007704,0.165561,0.366101,0.293243,0.718493
2,0.308296,0.883441,0.428845,0.430994,0.091420
3,0.107026,0.012218,0.689880,0.399853,0.383690
4,0.160121,0.869510,0.087840,0.910095,0.646413
...,...,...,...,...,...
195,0.741000,0.050178,0.697410,0.545251,0.790067
196,0.064214,0.094116,0.515442,0.187893,0.432972
197,0.295010,0.619418,0.232621,0.797384,0.789612
198,0.742466,0.994673,0.413103,0.468044,0.169835


### Rename Specific Columns--
### Use the df.rename() function and refer the columns to be renamed. Not all the columns have to be renamed:

In [81]:
new_df.rename(columns={4: 'A', 3: 'B',2: 'C', 1: 'D', 0: 'E'}, inplace=True)

In [82]:
new_df

Unnamed: 0,A,B,C,D,E
0,0.569567,0.165447,0.971081,0.255128,0.166589
1,0.718493,0.293243,0.366101,0.165561,0.007704
2,0.091420,0.430994,0.428845,0.883441,0.308296
3,0.383690,0.399853,0.689880,0.012218,0.107026
4,0.646413,0.910095,0.087840,0.869510,0.160121
...,...,...,...,...,...
195,0.790067,0.545251,0.697410,0.050178,0.741000
196,0.432972,0.187893,0.515442,0.094116,0.064214
197,0.789612,0.797384,0.232621,0.619418,0.295010
198,0.169835,0.468044,0.413103,0.994673,0.742466
