In [None]:


## **Introduction to Pandas**

## **Understanding Series and DataFrame**

Pandas is a powerful open-source data analysis and manipulation library 
for Python. It is built on top of NumPy and provides high-level data 
structures and tools designed for practical and efficient data analysis.
Pandas is widely used in data science, machine learning, and 
data visualization projects due to its ease of use and versatility.


### **1. Series**
- A Series is a one-dimensional labeled array.
- Syntax: 
  pandas.Series(data, index=index)

- Key Characteristics:
  - Homogeneous data type.
  - Indexed like a dictionary for quick access.

### **2. DataFrame**
- A DataFrame is a two-dimensional labeled data structure with rows and columns.
- Syntax:
  
  pandas.DataFrame(data, index=index, columns=columns)
  
- Key Characteristics:
  - Columns can have different data types.
  - Offers functionalities for data manipulation, analysis, and visualization.



In [None]:
import pandas as pd

In [None]:
import pandas as pd
data = [1, 2, 3, 4, 5]
series = pd.Series(data, index=['a', 'b', 'c', 'd', 'e'])
print(series)


In [None]:
import pandas as pd
data = [1, 2, 3, 4, 5]
series = pd.Series(data, index=['a', 'b', 'c', 'd', 'e'])
print(series)


In [None]:
data = [1, 2.4, 3.3,"abc", 5]
series = pd.Series(data, index=['a', 'b', 'c', 'd', 'e'])
print(series)


In [None]:
data = {
      'Name': ['Alice', 'Bob', 'Charlie'],
      'Age': [25, 30, 35],
      'City': ['New York', 'Los Angeles', 'Chicago']
  }
df = pd.DataFrame(data)
print(df)

## **Creating DataFrames**

### **1. From Dictionaries**

In [None]:
import pandas as pd

In [None]:
data = {
      'Name': ['John', 'Anna', 'Peter'],
      'Age': [28, 24, 35],
      'Profession': ['Engineer', 'Doctor', 'Lawyer']
  }
print(data)
df = pd.DataFrame(data)
print(df)


### **2. From Arrays**

In [None]:
import numpy as np
data = np.array([[1, 2, 3], [4, 5, 6]])
df = pd.DataFrame(data, columns=['A', 'B', 'C'])
print(df)  

### Indexing in series:

In [None]:
data = pd.Series([10, 20, 30, 40], index=['a', 'b', 'c', 'd'])

# Accessing by label
print(data['b'])  

# Accessing by position
print(data[2])  


### Indexing in DataFrame

In [None]:
# Creating a DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)
print(df)

In [None]:
# Accessing a column by label
print(df['Name']) 

In [None]:
# Accessing multiple columns
print(df[['Name', 'Age']])  # Outputs "Name" and "Age" columns


In [None]:
# Accessing a row using .loc (label-based)
print(df.loc[1])  # Row with index 1 (Bob)


In [None]:
# Accessing a row using .iloc (position-based)
print(df.iloc[2])  # Third row (Charlie)

#### slicing:


In [None]:
import pandas as pd

# Creating a Series
data = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])

# Slicing by position
print(data[1:4])  # Outputs 20,30,40

# Slicing by index label
print(data['b':'d'])  # Outputs 20,30,40

# Reverse slicing
print(data[::-1])  # Outputs elements in reverse order

# Slicing with step
print(data[::2])  # Every second element


### **3. From CSV Files** 

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("/kaggle/input/home-loan-approval-train-dataset/loan_sanction_train.csv")
df.head(10)

In [None]:
df.head()

In [None]:
df.head(10)

In [None]:
df.tail()

In [None]:
df.tail(10)

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.rename(columns={'CoapplicantIncome': 'CA'}, inplace=True)
df.head()

In [None]:
df.drop(columns=['CA'], inplace=True)
#df.drop(index=[0, 1], inplace=True)  # Drop rows 0 and 1
df.head()

In [None]:
df.info()

In [None]:
import numpy as np
import pandas as pd

In [None]:
df1 = pd.read_excel("/kaggle/input/model12/tensorflow2/default/1/Batch_report (13).xlsx")
df1