<a href="https://colab.research.google.com/github/Saranya9712/Pandas-Certificate-Generator/blob/main/Certificate_Generator_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install an external package "reportlab" -> its not normal python
# "reportLab" is a library to link pdf to a python program

# "pip" is a python package downloader (package manager)

!pip install reportlab



In [2]:
# mount google drive (connect this project with google drive)
# before this we easily clicked the google drive button and gained access to the files in our GDrive
# but now we are going to code our way to link to google drive

from google.colab import drive
drive.mount ('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import numpy as np
import pandas as pd

from reportlab.lib.pagesizes import landscape, A4
from reportlab.pdfgen import canvas
from reportlab.lib.units import inch
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

import os # is used to handle directory -> create or delete a folder


In [4]:
# import the excel file

df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/Project /dataset.xlsx')

In [5]:
df

# there are some rows that are either empty or having NaN or NaT
# NaT -> Not a Time -> the date that is supposed to be in that column is a date format. But that particular column is empty, so python will write as NaT
# NaN -> Not a Number -> the values are empty.

# In programming we always use the word "datetime" -> 2024-07-04 00:46:11 -> time is based on GMT-0 -> Grand Meridian Time



Unnamed: 0,Name,Course,CourseLevel,Date
0,Christy Cunningham,Python,Beginner,2023-09-10
1,Douglas Tucker,PYTHON,MASTER,2023-09-11
2,Travis Walters,Java,Intermediate,2023-09-12
3,Nathaniel Harris,Web Development,Advanced,2023-09-13
4,-,,Advanced,NaT
5,Tonya Carter,AI & Machine Learning,Beginner,2023-09-14
6,Erik Smith,Mobile Development,Beginner,2023-09-15
7,Kristopher Johnson,Python,Beginner,2023-09-16
8,Jonathan Bucker,,,NaT
9,Robert Buck,PYTHON,Master,2023-09-17


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Name         13 non-null     object        
 1   Course       11 non-null     object        
 2   CourseLevel  12 non-null     object        
 3   Date         11 non-null     datetime64[ns]
dtypes: datetime64[ns](1), object(3)
memory usage: 544.0+ bytes


In [7]:
# Data Cleaning -> Formatting the data before doing the certificate generator logic

# 1. There is an inconsistency formating in "Courses" & "CourseLevel" columns
# 2. Date format (yyyy/mm/dd) -> we want to chance it (dd/mm/yyyy)
# 3. Empty rows (there are some rows with empty datas)

# We are going to solve these problems using Data Analysis with Python

In [8]:
df = df.dropna()


In [9]:
df

Unnamed: 0,Name,Course,CourseLevel,Date
0,Christy Cunningham,Python,Beginner,2023-09-10
1,Douglas Tucker,PYTHON,MASTER,2023-09-11
2,Travis Walters,Java,Intermediate,2023-09-12
3,Nathaniel Harris,Web Development,Advanced,2023-09-13
5,Tonya Carter,AI & Machine Learning,Beginner,2023-09-14
6,Erik Smith,Mobile Development,Beginner,2023-09-15
7,Kristopher Johnson,Python,Beginner,2023-09-16
9,Robert Buck,PYTHON,Master,2023-09-17
10,Joseph Mcdonald,Java,Intermediate,2023-09-18
11,Jerome Abbott,Web Development,Advanced,2023-09-19


In [10]:
df['Date'].iloc[0]
# iloc -> index Location

# we need to change the Timestamp ('2023-09-10 00:00:00) --> "10/09/2023"

Timestamp('2023-09-10 00:00:00')

In [11]:
df['FormattedDate'] = df['Date'].dt.strftime('%d/%m/%y')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['FormattedDate'] = df['Date'].dt.strftime('%d/%m/%y')


In [12]:
df

Unnamed: 0,Name,Course,CourseLevel,Date,FormattedDate
0,Christy Cunningham,Python,Beginner,2023-09-10,10/09/23
1,Douglas Tucker,PYTHON,MASTER,2023-09-11,11/09/23
2,Travis Walters,Java,Intermediate,2023-09-12,12/09/23
3,Nathaniel Harris,Web Development,Advanced,2023-09-13,13/09/23
5,Tonya Carter,AI & Machine Learning,Beginner,2023-09-14,14/09/23
6,Erik Smith,Mobile Development,Beginner,2023-09-15,15/09/23
7,Kristopher Johnson,Python,Beginner,2023-09-16,16/09/23
9,Robert Buck,PYTHON,Master,2023-09-17,17/09/23
10,Joseph Mcdonald,Java,Intermediate,2023-09-18,18/09/23
11,Jerome Abbott,Web Development,Advanced,2023-09-19,19/09/23


In [13]:
df = df.drop('Date', axis=1)

In [14]:
df

Unnamed: 0,Name,Course,CourseLevel,FormattedDate
0,Christy Cunningham,Python,Beginner,10/09/23
1,Douglas Tucker,PYTHON,MASTER,11/09/23
2,Travis Walters,Java,Intermediate,12/09/23
3,Nathaniel Harris,Web Development,Advanced,13/09/23
5,Tonya Carter,AI & Machine Learning,Beginner,14/09/23
6,Erik Smith,Mobile Development,Beginner,15/09/23
7,Kristopher Johnson,Python,Beginner,16/09/23
9,Robert Buck,PYTHON,Master,17/09/23
10,Joseph Mcdonald,Java,Intermediate,18/09/23
11,Jerome Abbott,Web Development,Advanced,19/09/23


In [15]:
# Problem 1 -> Inconsistency spelling in Courses and CourseLevel

df['Course'] = df['Course'].str.capitalize()

In [16]:
df

Unnamed: 0,Name,Course,CourseLevel,FormattedDate
0,Christy Cunningham,Python,Beginner,10/09/23
1,Douglas Tucker,Python,MASTER,11/09/23
2,Travis Walters,Java,Intermediate,12/09/23
3,Nathaniel Harris,Web development,Advanced,13/09/23
5,Tonya Carter,Ai & machine learning,Beginner,14/09/23
6,Erik Smith,Mobile development,Beginner,15/09/23
7,Kristopher Johnson,Python,Beginner,16/09/23
9,Robert Buck,Python,Master,17/09/23
10,Joseph Mcdonald,Java,Intermediate,18/09/23
11,Jerome Abbott,Web development,Advanced,19/09/23


In [17]:
df['CourseLevel'] = df['CourseLevel'].str.capitalize()

In [18]:
df

Unnamed: 0,Name,Course,CourseLevel,FormattedDate
0,Christy Cunningham,Python,Beginner,10/09/23
1,Douglas Tucker,Python,Master,11/09/23
2,Travis Walters,Java,Intermediate,12/09/23
3,Nathaniel Harris,Web development,Advanced,13/09/23
5,Tonya Carter,Ai & machine learning,Beginner,14/09/23
6,Erik Smith,Mobile development,Beginner,15/09/23
7,Kristopher Johnson,Python,Beginner,16/09/23
9,Robert Buck,Python,Master,17/09/23
10,Joseph Mcdonald,Java,Intermediate,18/09/23
11,Jerome Abbott,Web development,Advanced,19/09/23


In [19]:
fontsPath = "/content/drive/MyDrive/Colab Notebooks/Project /fonts"

In [20]:
pdfmetrics.registerFont(TTFont('Lora-Bold',os.path.join(fontsPath,'Lora-Bold.ttf')))
pdfmetrics.registerFont(TTFont('Lora-Regular',os.path.join(fontsPath,'Lora-Regular.ttf')))

In [21]:
def certificate_generator(name,courseName,courseLevel,date):

  pdfFileName = "/content/drive/MyDrive/Colab Notebooks/Project /generated-folder/" + name + "-" + courseName + "-" + courseLevel + ".pdf"


  c = canvas.Canvas(pdfFileName,pagesize=landscape(A4))

  imagePath = "/content/drive/MyDrive/Colab Notebooks/Project /certificate_template.jpg"

  c.drawImage(imagePath,0,0,width=A4[1],height=A4[0])

  # width
  center_x = c._pagesize[0] / 2

  # height
  center_y = c._pagesize[1] / 2

  # 1. Name
  c.setFont("Lora-Bold",30)
  c.drawCentredString(center_x, center_y - 46, name)

# 2. CourseName & CourseLevel
  c.setFont("Lora-Bold",28)
  c.drawCentredString(center_x, center_y - 105, courseName + " - " + courseLevel)

# 3. Date
  c.setFont("Lora-Bold",17)
  c.drawCentredString(center_x + 190, center_y - 160, date)

# 4. Cert ID
  cert_id = "Cert ID: " + str(int(pd.Timestamp.now().timestamp()))
  c.setFont("Lora-Bold",12)
  c.drawCentredString(center_x + 266, center_y - 230, cert_id.upper())


  c.save()

In [22]:
df

Unnamed: 0,Name,Course,CourseLevel,FormattedDate
0,Christy Cunningham,Python,Beginner,10/09/23
1,Douglas Tucker,Python,Master,11/09/23
2,Travis Walters,Java,Intermediate,12/09/23
3,Nathaniel Harris,Web development,Advanced,13/09/23
5,Tonya Carter,Ai & machine learning,Beginner,14/09/23
6,Erik Smith,Mobile development,Beginner,15/09/23
7,Kristopher Johnson,Python,Beginner,16/09/23
9,Robert Buck,Python,Master,17/09/23
10,Joseph Mcdonald,Java,Intermediate,18/09/23
11,Jerome Abbott,Web development,Advanced,19/09/23


In [30]:
# certificate_generator function needs 4 inputs --> (name,courseName,courseLevel,date)

# iterrows --> iterable(loopable) rows
for index,row in df.iterrows():
  certificate_generator(row["Name"],row["Course"],row["CourseLevel"],row["FormattedDate"])

In [24]:
# certificate_generator function needs 4 inputs --> (name,courseName,courseLevel,date)
certificate_generator("Saranya","Python","Beginner","07/04/2024")