[Reference](https://blog.devgenius.io/exploring-python-pandas-bdc7ff61a8f8)

In [1]:
# import the Pandas library using the common alias pd.
# This is a standard practice in the Python data science community.
# By using pd as an alias, you can reference Pandas functions and
# classes more succinctly in your code.
import pandas as pd

# import the NumPy library using the common alias np
import numpy as np

# Creating a Series
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [2]:
# import the Pandas library using the common alias pd.
import pandas as pd

# Creating a DataFrame from a dictionary
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'San Francisco', 'Los Angeles']}

df = pd.DataFrame(data)

In [3]:
# Using bracket notation
city_column_bracket = df['City']
city_column_bracket

0         New York
1    San Francisco
2      Los Angeles
Name: City, dtype: object

In [4]:
# Using dot notation
city_column_dot = df.City
city_column_dot

0         New York
1    San Francisco
2      Los Angeles
Name: City, dtype: object

In [5]:
# Checking if the two approaches yield the same result
city_column_bracket.equals(city_column_dot)

True

In [6]:
# Concatenating "City" and "Name" using the + operator
df['City_Name'] = df['City'] + ' ' + df['Name']
df

Unnamed: 0,Name,Age,City,City_Name
0,Alice,25,New York,New York Alice
1,Bob,30,San Francisco,San Francisco Bob
2,Charlie,35,Los Angeles,Los Angeles Charlie


In [7]:
# Concatenating "City" and "Name" using the concat function
df['City_Name'] = pd.concat([df['City'], df['Name']], axis=1).apply(lambda x: ' '.join(x), axis=1)

# Displaying the concatenated column
df

Unnamed: 0,Name,Age,City,City_Name
0,Alice,25,New York,New York Alice
1,Bob,30,San Francisco,San Francisco Bob
2,Charlie,35,Los Angeles,Los Angeles Charlie


In [8]:
# Filtering the DataFrame for rows where 'Age' is greater than 26
filtered_df = df[df['Age'] > 26]

# Displaying the filtered DataFrame
print(filtered_df)

      Name  Age           City            City_Name
1      Bob   30  San Francisco    San Francisco Bob
2  Charlie   35    Los Angeles  Los Angeles Charlie


In [9]:
# Importing the pandas library and using the alias "pd" for convenience
import pandas as pd

# Reading a CSV file from a URL and storing it in a DataFrame
df = pd.read_csv("https://raw.githubusercontent.com/aimlresearcher/datasets/main/titanic/titanic_data.csv")

# Displaying the DataFrame
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [10]:
# Displaying the DataFrame
df.shape

(891, 12)

In [11]:
# Displaying first 3 rows of the dataset
df.head(n = 3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [12]:
df.tail(n = 3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [13]:
df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [14]:
df.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [15]:
df.loc[:2, ['Name', 'Sex']]

Unnamed: 0,Name,Sex
0,"Braund, Mr. Owen Harris",male
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female
2,"Heikkinen, Miss. Laina",female


In [16]:
df.iloc[:3, 3:5]

Unnamed: 0,Name,Sex
0,"Braund, Mr. Owen Harris",male
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female
2,"Heikkinen, Miss. Laina",female


In [18]:
df.to_csv("modified_titanic_dataset.csv", index=False)

In [19]:
# Assuming df is your DataFrame
pd.set_option('display.max_rows', 2)

# Display the DataFrame
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [20]:
# Assuming df is your DataFrame
pd.set_option('display.max_columns', 2)

# Display the DataFrame
df

Unnamed: 0,PassengerId,...,Embarked
0,1,...,S
...,...,...,...
890,891,...,Q


In [21]:
import pandas as pd
df = pd.DataFrame({"Column1":[123456.98765, 256987.125698],"Column2":[546987.3256, 639854.4521369]})
pd.options.display.float_format = '{:,.2f}'.format
df

Unnamed: 0,Column1,Column2
0,123456.99,546987.33
1,256987.13,639854.45


In [22]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 22, 35],
    'Height': [165.5, 180.0, 170.2, 160.8],
    'IsStudent': [True, False, True, False]
}

df = pd.DataFrame(data)
# Display Data Types
df.dtypes
## Name          object
## Age            int64
## Height       float64
## IsStudent       bool
## dtype: object

# Change data types of specific columns
df['Age'] = df['Age'].astype('float64')  # Change 'Age' to float
df['Height'] = df['Height'].astype('int64')  # Change 'Height' to integer
df['IsStudent'] = df['IsStudent'].astype('str')  # Change 'IsStudent' to string

In [23]:
import pandas as pd
import numpy as np

# Creating a simple dataset with date column
data = {
    'Date': pd.date_range(start='2023-11-14', periods=5, freq='D'),
    'Value': np.random.randint(1, 100, size=5)
}

df = pd.DataFrame(data)

# Display the original DataFrame
print(df)

         Date  Value
0  2023-11-14     72
..        ...    ...
4  2023-11-18     92

[5 rows x 2 columns]


In [24]:
df['Date'].dt.day

0    14
     ..
4    18
Name: Date, Length: 5, dtype: int32

In [25]:
df['Date'].dt.month

0    11
     ..
4    11
Name: Date, Length: 5, dtype: int32

In [26]:
df['Date'].dt.month

0    11
     ..
4    11
Name: Date, Length: 5, dtype: int32

In [28]:
# imports the datetime class from the datetime module
from datetime import datetime

# This line creates a datetime object representing the current date and time.
current_datetime = datetime.now()

# This line uses the strftime method to format the datetime object
# into a string. The format specified is "%Y-%m-%d %H:%M:%S",
# which stands for Year-Month-Day Hour:Minute:Second. It defines
# the desired order and format for each component.
formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M:%S")

print("Current Date and Time:", formatted_datetime)

Current Date and Time: 2024-05-15 00:53:17


In [29]:
import pandas as pd
from datetime import datetime

# Create a simple dataset
data = {'start_time': ['2023-01-01 12:00:00', '2023-01-01 15:30:00', '2023-01-02 08:45:00'],
        'end_time': ['2023-02-01 14:30:00', '2023-01-03 17:45:00', '2023-09-02 10:30:00']}

df = pd.DataFrame(data)

# Convert string timestamps to datetime objects
df['start_time'] = pd.to_datetime(df['start_time'])
df['end_time'] = pd.to_datetime(df['end_time'])

# Display the original dataset
print("Original Dataset:")
## Original Dataset:
print(df)

Original Dataset:
            start_time            end_time
0  2023-01-01 12:00:00 2023-02-01 14:30:00
..                 ...                 ...
2  2023-01-02 08:45:00 2023-09-02 10:30:00

[3 rows x 2 columns]


In [30]:
import pandas as pd
import numpy as np

# Creating a DataFrame with missing values
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
        'Age': [25, np.nan, 30, 22, np.nan],
        'Salary': [50000, 60000, np.nan, 45000, 55000]}

df = pd.DataFrame(data)

# Displaying the dataset with missing values
print("Original Dataset with Missing Values:")
print(df)

Original Dataset with Missing Values:
     Name  ...    Salary
0   Alice  ... 50,000.00
..    ...  ...       ...
4     Eva  ... 55,000.00

[5 rows x 3 columns]


In [31]:
df.isnull()

Unnamed: 0,Name,...,Salary
0,False,...,False
...,...,...,...
4,False,...,False


In [32]:
import pandas as pd
df = pd.DataFrame({
    "Region": ['North', 'West', 'East', 'South', 'North', 'West', 'East', 'South'],
    "Team": ['One', 'One', 'One', 'One', 'Two', 'Two', 'Two', 'Two'],
    "Squad": ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'],
    "Revenue": [7500, 5500, 2750, 6400, 2300, 3750, 1900, 575],
    "Cost": [5200, 5100, 4400, 5300, 1250, 1300, 2100, 50]
})
df['Profit'] = df.apply(lambda x: 'Profit' if x['Revenue'] > x['Cost'] else 'Loss', axis=1)
df

Unnamed: 0,Region,...,Profit
0,North,...,Profit
...,...,...,...
7,South,...,Profit


In [33]:
team_map = {"One":"Red","Two":"Blue"}
df['Team Color'] = df['Team'].map(team_map)
df

Unnamed: 0,Region,...,Team Color
0,North,...,Red
...,...,...,...
7,South,...,Blue


In [34]:
new_col = []

for i in range(0,len(df)):
  rev = df['Revenue'][i]/df[df['Region']==df.loc[i,'Region']]['Revenue'].sum()
  new_col.append(rev)
df['Revenue Share of Region'] = new_col
df.sort_values(by='Region')

Unnamed: 0,Region,...,Revenue Share of Region
2,East,...,0.59
...,...,...,...
5,West,...,0.41
