In [1]:
import pandas as pd
import numpy as np

In [3]:
# ==============================
# 1. Ways to Create a Series
# ==============================
print("\n--- Series Creation ---")

# From list
s1 = pd.Series([10, 20, 30, 40])

# From numpy array
s2 = pd.Series(np.array([1, 2, 3, 4]))

# From dictionary (keys become index)
s3 = pd.Series({"a": 100, "b": 200, "c": 300})

# From scalar (repeated values)
s4 = pd.Series(5, index=["x", "y", "z"])

print("Series from list:\n", s1)
print("Series from NumPy array:\n", s2)
print("Series from dict:\n", s3)
print("Series from scalar:\n", s4)


--- Series Creation ---
Series from list:
 0    10
1    20
2    30
3    40
dtype: int64
Series from NumPy array:
 0    1
1    2
2    3
3    4
dtype: int64
Series from dict:
 a    100
b    200
c    300
dtype: int64
Series from scalar:
 x    5
y    5
z    5
dtype: int64


In [4]:
# Common operations on Series
print("\n--- Series Operations ---")
print("Head:\n", s1.head(2))
print("Indexing single element:", s1[1])       # by position
print("Slicing:\n", s1[1:3])                   # slicing
print("Mathematical ops:\n", s1 * 2)           # element-wise
print("Boolean filtering:\n", s1[s1 > 15])     # condition
print("Descriptive stats:\n", s1.describe())   # summary


--- Series Operations ---
Head:
 0    10
1    20
dtype: int64
Indexing single element: 20
Slicing:
 1    20
2    30
dtype: int64
Mathematical ops:
 0    20
1    40
2    60
3    80
dtype: int64
Boolean filtering:
 1    20
2    30
3    40
dtype: int64
Descriptive stats:
 count     4.000000
mean     25.000000
std      12.909944
min      10.000000
25%      17.500000
50%      25.000000
75%      32.500000
max      40.000000
dtype: float64


In [5]:
# ==============================
# 2. Ways to Create a DataFrame
# ==============================
print("\n--- DataFrame Creation ---")

# From dictionary of lists
df1 = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35]
})

# From dictionary of Series
df2 = pd.DataFrame({
    "A": pd.Series([1, 2, 3], index=["x", "y", "z"]),
    "B": pd.Series([10, 20, 30], index=["x", "y", "z"])
})

# From list of dictionaries
df3 = pd.DataFrame([
    {"id": 1, "value": 100},
    {"id": 2, "value": 200}
])

# From 2D NumPy array
df4 = pd.DataFrame(np.arange(12).reshape(3, 4),
                   columns=["A", "B", "C", "D"])


--- DataFrame Creation ---


In [6]:
print("DataFrame from dict of lists:\n", df1)
print("DataFrame from dict of Series:\n", df2)
print("DataFrame from list of dicts:\n", df3)
print("DataFrame from NumPy array:\n", df4)

DataFrame from dict of lists:
       Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
DataFrame from dict of Series:
    A   B
x  1  10
y  2  20
z  3  30
DataFrame from list of dicts:
    id  value
0   1    100
1   2    200
DataFrame from NumPy array:
    A  B   C   D
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11


In [7]:
# Common operations on DataFrame
print("\n--- DataFrame Operations ---")
print("Head:\n", df1.head(2))
print("Info:\n"); print(df1.info())
print("Describe:\n", df1.describe())
print("Select column:\n", df1["Name"])
print("Select multiple columns:\n", df1[["Name", "Age"]])
print("Row selection (iloc):\n", df1.iloc[1])
print("Row selection (loc):\n", df1.loc[0, "Name"])
print("Filtering:\n", df1[df1["Age"] > 25])
print("Sorting:\n", df1.sort_values("Age", ascending=False))
print("Adding new column:\n", df1.assign(Salary=[5000, 6000, 7000]))
print("Group by Age:\n", df1.groupby("Age").size())
print("Missing values check:\n", df1.isnull())


--- DataFrame Operations ---
Head:
     Name  Age
0  Alice   25
1    Bob   30
Info:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 180.0+ bytes
None
Describe:
         Age
count   3.0
mean   30.0
std     5.0
min    25.0
25%    27.5
50%    30.0
75%    32.5
max    35.0
Select column:
 0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
Select multiple columns:
       Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
Row selection (iloc):
 Name    Bob
Age      30
Name: 1, dtype: object
Row selection (loc):
 Alice
Filtering:
       Name  Age
1      Bob   30
2  Charlie   35
Sorting:
       Name  Age
2  Charlie   35
1      Bob   30
0    Alice   25
Adding new column:
       Name  Age  Salary
0    Alice   25    5000
1      Bob   30    6

In [None]:
import pandas as pd
import sqlite3

# ----------------------------
# 1. Reading Data
# ----------------------------

# Read CSV file
df_csv = pd.read_csv("data.csv")
print("CSV Data:\n", df_csv.head())

# Read Excel file (requires openpyxl for .xlsx)
df_excel = pd.read_excel("data.xlsx")
print("Excel Data:\n", df_excel.head())

# Read JSON file
df_json = pd.read_json("data.json")
print("JSON Data:\n", df_json.head())

# Read SQL database
# (Example: SQLite connection)
conn = sqlite3.connect("example.db")
df_sql = pd.read_sql("SELECT * FROM my_table", conn)
print("SQL Data:\n", df_sql.head())
conn.close()


# ----------------------------
# 2. Writing Data
# ----------------------------

# Write to CSV
df_csv.to_csv("output.csv", index=False)

# Write to Excel
df_excel.to_excel("output.xlsx", index=False)

# Write to JSON
df_json.to_json("output.json", orient="records", indent=4)


In [8]:
import pandas as pd

# ----------------------------
# Create a sample DataFrame
# ----------------------------
data = {
    "Name": ["Alice", "Bob", "Charlie", "David", "Eva", "Frank"],
    "Age": [25, 30, 35, 40, 28, 33],
    "Department": ["HR", "IT", "Finance", "IT", "HR", "Finance"],
    "Salary": [50000, 60000, 75000, 80000, 55000, 70000]
}

df = pd.DataFrame(data)

# ----------------------------
# 1. Previewing data
# ----------------------------
print("First 5 rows:\n", df.head())    # first 5 rows
print("\nLast 5 rows:\n", df.tail())   # last 5 rows

# ----------------------------
# 2. Info and shape
# ----------------------------
print("\nInfo about DataFrame:")
print(df.info())  # summary of columns, non-null values, data types

print("\nShape of DataFrame (rows, columns):", df.shape)

# ----------------------------
# 3. Descriptive statistics
# ----------------------------
print("\nStatistical Summary:\n", df.describe())

# ----------------------------
# 4. Checking columns and index
# ----------------------------
print("\nColumns:", df.columns)
print("Index:", df.index)


First 5 rows:
       Name  Age Department  Salary
0    Alice   25         HR   50000
1      Bob   30         IT   60000
2  Charlie   35    Finance   75000
3    David   40         IT   80000
4      Eva   28         HR   55000

Last 5 rows:
       Name  Age Department  Salary
1      Bob   30         IT   60000
2  Charlie   35    Finance   75000
3    David   40         IT   80000
4      Eva   28         HR   55000
5    Frank   33    Finance   70000

Info about DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        6 non-null      object
 1   Age         6 non-null      int64 
 2   Department  6 non-null      object
 3   Salary      6 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 324.0+ bytes
None

Shape of DataFrame (rows, columns): (6, 4)

Statistical Summary:
              Age        Salary
count   6.000000      6.00000