In [2]:
import pandas as pd

# Loading data
[Pandas read options](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html)

In [3]:
df = pd.read_csv('data.csv', index_col='Unnamed: 0')

df.head(1)

Unnamed: 0,source_id,source_name,author,title,description,url,url_to_image,published_at,content,top_article,engagement_reaction_count,engagement_comment_count,engagement_share_count,engagement_comment_plugin_count
0,reuters,Reuters,Reuters Editorial,NTSB says Autopilot engaged in 2018 California...,The National Transportation Safety Board said ...,https://www.reuters.com/article/us-tesla-crash...,https://s4.reutersmedia.net/resources/r/?m=02&...,2019-09-03T16:22:20Z,WASHINGTON (Reuters) - The National Transporta...,0.0,0.0,0.0,2528.0,0.0


# Display parameters
[pandas.set_option parameters](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.set_option.html?highlight=set_option#pandas-set-option)

In [4]:
pd.set_option('display.max_columns',20)
pd.set_option('display.max_rows',20)

# Dataframe
[Dataframe parameters](https://pandas.pydata.org/pandas-docs/stable/reference/frame.html)

## Creating a dataframe

In [5]:
people = {
    "First" : ["Fred","Corey","John"],
    "Last" : ["Teste","teste","teste"],
    "Email" : ["123@teste.com","123@teste.com","123@teste.com"]
}

numbers = {
    "A" : ["1","2","3"],
    "B" : ["2","2","2"],
    "C" : ["5","5","4"]
}

df_people = pd.DataFrame(people)
df_num = pd.DataFrame(numbers)

print(df_num)
print(df_people)

   A  B  C
0  1  2  5
1  2  2  5
2  3  2  4
   First   Last          Email
0   Fred  Teste  123@teste.com
1  Corey  teste  123@teste.com
2   John  teste  123@teste.com


## Informations gattering

### Full size

In [6]:
df_people.size

9

### Dimentions

In [7]:
df_people.shape

(3, 3)

### Labels

In [30]:
df_people.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['First', 'Last', 'Email'], dtype='object')]

### Unique Values

In [9]:
print(df_num.A.unique())
print(df_num.B.unique())
print(df_num.C.unique())
df_num.head()

['1' '2' '3']
['2']
['5' '4']


Unnamed: 0,A,B,C
0,1,2,5
1,2,2,5
2,3,2,4


### Count

In [65]:
df_num.count()

A    3
B    3
C    3
dtype: int64

### Describe

In [10]:
df.describe()

Unnamed: 0,top_article,engagement_reaction_count,engagement_comment_count,engagement_share_count,engagement_comment_plugin_count
count,10435.0,10319.0,10319.0,10319.0,10319.0
mean,0.122089,381.39529,124.032949,196.236263,0.011629
std,0.327404,4433.344792,965.351188,1020.680229,0.268276
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,1.0,0.0
50%,0.0,1.0,0.0,8.0,0.0
75%,0.0,43.0,12.0,47.5,0.0
max,1.0,354132.0,48490.0,39422.0,15.0


### Show NaN values

In [77]:
df.isna().head()

Unnamed: 0,source_id,source_name,author,title,description,url,url_to_image,published_at,content,top_article,engagement_reaction_count,engagement_comment_count,engagement_share_count,engagement_comment_plugin_count
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,True,True,True,True
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,True,False,False,False,False,False


## Filtering

### Sorting

In [28]:
print(df_num)
df_num.sort_values(by=['A'],ascending=False)

   A  B  C
0  1  2  5
1  2  2  5
2  3  2  4


Unnamed: 0,A,B,C
2,3,2,4
1,2,2,5
0,1,2,5


In [40]:
df.head(2)

Unnamed: 0,source_id,source_name,author,title,description,url,url_to_image,published_at,content,top_article,engagement_reaction_count,engagement_comment_count,engagement_share_count,engagement_comment_plugin_count
0,reuters,Reuters,Reuters Editorial,NTSB says Autopilot engaged in 2018 California...,The National Transportation Safety Board said ...,https://www.reuters.com/article/us-tesla-crash...,https://s4.reutersmedia.net/resources/r/?m=02&...,2019-09-03T16:22:20Z,WASHINGTON (Reuters) - The National Transporta...,0.0,0.0,0.0,2528.0,0.0
1,the-irish-times,The Irish Times,Eoin Burke-Kennedy,Unemployment falls to post-crash low of 5.2%,Latest monthly figures reflect continued growt...,https://www.irishtimes.com/business/economy/un...,https://www.irishtimes.com/image-creator/?id=1...,2019-09-03T10:32:28Z,The States jobless rate fell to 5.2 per cent l...,0.0,6.0,10.0,2.0,0.0


### Find a value

In [79]:
MyVal = df.where(df == "reuters")
MyVal.head(5)

Unnamed: 0,source_id,source_name,author,title,description,url,url_to_image,published_at,content,top_article,engagement_reaction_count,engagement_comment_count,engagement_share_count,engagement_comment_plugin_count
0,reuters,,,,,,,,,,,,,
1,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,


### Droping null values

#### [Dropna documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html)

#### Dropna all values

In [116]:
RemNull = df.dropna()
RemNull.head(2)

Unnamed: 0,source_id,source_name,author,title,description,url,url_to_image,published_at,content,top_article,engagement_reaction_count,engagement_comment_count,engagement_share_count,engagement_comment_plugin_count
0,reuters,Reuters,Reuters Editorial,NTSB says Autopilot engaged in 2018 California...,The National Transportation Safety Board said ...,https://www.reuters.com/article/us-tesla-crash...,https://s4.reutersmedia.net/resources/r/?m=02&...,2019-09-03T16:22:20Z,WASHINGTON (Reuters) - The National Transporta...,0.0,0.0,0.0,2528.0,0.0
1,the-irish-times,The Irish Times,Eoin Burke-Kennedy,Unemployment falls to post-crash low of 5.2%,Latest monthly figures reflect continued growt...,https://www.irishtimes.com/business/economy/un...,https://www.irishtimes.com/image-creator/?id=1...,2019-09-03T10:32:28Z,The States jobless rate fell to 5.2 per cent l...,0.0,6.0,10.0,2.0,0.0


#### Dropna specific columns

In [118]:
RemNull = MyVal.source_id.dropna()
RemNull.head(5)

0     reuters
6     reuters
7     reuters
13    reuters
24    reuters
Name: source_id, dtype: object